From 3d28da78f3e8bab7a13f4f928cdd7747ab943197 Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Mon, 30 Oct 2017 11:55:42 +0100 Subject: stdlib: Optimize base64 functions A few test cases with zeroes are added. They were not handled correctly before. The access of DECODE_MAP is moved into the inlined function b64d, for symmetry. The function b64e is also inlined. The speed-up is small, but measurable. Note: encode(List), decode(List) and mime_decode(List) no longer call list_to_binary. This can break code that calls the functions with I/O-lists as input. --- lib/stdlib/src/base64.erl | 699 ++++++++++++++++++++++++--------------- lib/stdlib/test/base64_SUITE.erl | 96 +++--- 2 files changed, 489 insertions(+), 306 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl index c8cf6fdffe..6311f3c86b 100644 --- a/lib/stdlib/src/base64.erl +++ b/lib/stdlib/src/base64.erl @@ -24,89 +24,95 @@ -export([encode/1, decode/1, mime_decode/1, encode_to_string/1, decode_to_string/1, mime_decode_to_string/1]). -%%------------------------------------------------------------------------- %% The following type is a subtype of string() for return values %% of (some) functions of this module. -%%------------------------------------------------------------------------- - -type ascii_string() :: [1..255]. -type ascii_binary() :: binary(). -%%------------------------------------------------------------------------- -%% encode_to_string(ASCII) -> Base64String -%% ASCII - string() | binary() -%% Base64String - string() -%% -%% Description: Encodes a plain ASCII string (or binary) into base64. -%%------------------------------------------------------------------------- - -spec encode_to_string(Data) -> Base64String when Data :: ascii_string() | ascii_binary(), Base64String :: ascii_string(). encode_to_string(Bin) when is_binary(Bin) -> - encode_to_string(binary_to_list(Bin)); + encode_binary_to_string(Bin); encode_to_string(List) when is_list(List) -> - encode_l(List). - -%%------------------------------------------------------------------------- -%% encode(ASCII) -> Base64 -%% ASCII - string() | binary() -%% Base64 - binary() -%% -%% Description: Encodes a plain ASCII string (or binary) into base64. -%%------------------------------------------------------------------------- + encode_list_to_string(List). -spec encode(Data) -> Base64 when Data :: ascii_string() | ascii_binary(), Base64 :: ascii_binary(). encode(Bin) when is_binary(Bin) -> - encode_binary(Bin); + encode_binary(Bin, <<>>); encode(List) when is_list(List) -> - list_to_binary(encode_l(List)). + encode_list(List, <<>>). --spec encode_l(ascii_string()) -> ascii_string(). +encode_binary_to_string(<<>>) -> + []; +encode_binary_to_string(<>) -> + [b64e(B1 bsr 2), + b64e((B1 band 3) bsl 4), $=, $=]; +encode_binary_to_string(<>) -> + [b64e(B1 bsr 2), + b64e(((B1 band 3) bsl 4) bor (B2 bsr 4)), + b64e((B2 band 15) bsl 2), $=]; +encode_binary_to_string(<>) -> + BB = (B1 bsl 16) bor (B2 bsl 8) bor B3, + [b64e(BB bsr 18), + b64e((BB bsr 12) band 63), + b64e((BB bsr 6) band 63), + b64e(BB band 63) | encode_binary_to_string(Ls)]. -encode_l([]) -> +encode_list_to_string([]) -> []; -encode_l([A]) -> - [b64e(A bsr 2), - b64e((A band 3) bsl 4), $=, $=]; -encode_l([A,B]) -> - [b64e(A bsr 2), - b64e(((A band 3) bsl 4) bor (B bsr 4)), - b64e((B band 15) bsl 2), $=]; -encode_l([A,B,C|Ls]) -> - BB = (A bsl 16) bor (B bsl 8) bor C, +encode_list_to_string([B1]) -> + [b64e(B1 bsr 2), + b64e((B1 band 3) bsl 4), $=, $=]; +encode_list_to_string([B1,B2]) -> + [b64e(B1 bsr 2), + b64e(((B1 band 3) bsl 4) bor (B2 bsr 4)), + b64e((B2 band 15) bsl 2), $=]; +encode_list_to_string([B1,B2,B3|Ls]) -> + BB = (B1 bsl 16) bor (B2 bsl 8) bor B3, [b64e(BB bsr 18), b64e((BB bsr 12) band 63), b64e((BB bsr 6) band 63), - b64e(BB band 63) | encode_l(Ls)]. - -encode_binary(Bin) -> - Split = 3*(byte_size(Bin) div 3), - <> = Bin, - Main = << <<(b64e(C)):8>> || <> <= Main0 >>, - case Rest of - <> -> - <
>; - <> -> - <
>; - <<>> -> - Main - end. + b64e(BB band 63) | encode_list_to_string(Ls)]. -%%------------------------------------------------------------------------- -%% mime_decode(Base64) -> ASCII -%% decode(Base64) -> ASCII -%% Base64 - string() | binary() -%% ASCII - binary() -%% -%% Description: Decodes an base64 encoded string to plain ASCII. -%% mime_decode strips away all characters not Base64 before converting, -%% whereas decode crashes if an illegal character is found -%%------------------------------------------------------------------------- +encode_binary(<<>>, A) -> + A; +encode_binary(<>, A) -> + <>; +encode_binary(<>, A) -> + <>; +encode_binary(<>, A) -> + BB = (B1 bsl 16) bor (B2 bsl 8) bor B3, + encode_binary(Ls, + <>). + +encode_list([], A) -> + A; +encode_list([B1], A) -> + <>; +encode_list([B1,B2], A) -> + <>; +encode_list([B1,B2,B3|Ls], A) -> + BB = (B1 bsl 16) bor (B2 bsl 8) bor B3, + encode_list(Ls, + <>). + +%% mime_decode strips away all characters not Base64 before +%% converting, whereas decode crashes if an illegal character is found -spec decode(Base64) -> Data when Base64 :: ascii_string() | ascii_binary(), @@ -122,158 +128,280 @@ decode(List) when is_list(List) -> Data :: ascii_binary(). mime_decode(Bin) when is_binary(Bin) -> - mime_decode_binary(<<>>, Bin); + mime_decode_binary(Bin, <<>>); mime_decode(List) when is_list(List) -> - mime_decode(list_to_binary(List)). + mime_decode_list(List, <<>>). --spec decode_l(ascii_string()) -> ascii_string(). - -decode_l(List) -> - L = strip_spaces(List, []), - decode(L, []). - --spec mime_decode_l(ascii_string()) -> ascii_string(). - -mime_decode_l(List) -> - L = strip_illegal(List, [], 0), - decode(L, []). - -%%------------------------------------------------------------------------- -%% mime_decode_to_string(Base64) -> ASCII -%% decode_to_string(Base64) -> ASCII -%% Base64 - string() | binary() -%% ASCII - binary() -%% -%% Description: Decodes an base64 encoded string to plain ASCII. -%% mime_decode strips away all characters not Base64 before converting, -%% whereas decode crashes if an illegal character is found -%%------------------------------------------------------------------------- +%% mime_decode_to_string strips away all characters not Base64 before +%% converting, whereas decode_to_string crashes if an illegal +%% character is found -spec decode_to_string(Base64) -> DataString when Base64 :: ascii_string() | ascii_binary(), DataString :: ascii_string(). decode_to_string(Bin) when is_binary(Bin) -> - decode_to_string(binary_to_list(Bin)); + decode_binary_to_string(Bin); decode_to_string(List) when is_list(List) -> - decode_l(List). + decode_list_to_string(List). -spec mime_decode_to_string(Base64) -> DataString when Base64 :: ascii_string() | ascii_binary(), DataString :: ascii_string(). mime_decode_to_string(Bin) when is_binary(Bin) -> - mime_decode_to_string(binary_to_list(Bin)); + mime_decode_binary_to_string(Bin); mime_decode_to_string(List) when is_list(List) -> - mime_decode_l(List). - -%% One-based decode map. --define(DECODE_MAP, - {bad,bad,bad,bad,bad,bad,bad,bad,ws,ws,bad,bad,ws,bad,bad, %1-15 - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, %16-31 - ws,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,62,bad,bad,bad,63, %32-47 - 52,53,54,55,56,57,58,59,60,61,bad,bad,bad,eq,bad,bad, %48-63 - bad,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14, - 15,16,17,18,19,20,21,22,23,24,25,bad,bad,bad,bad,bad, - bad,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, - 41,42,43,44,45,46,47,48,49,50,51,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad}). + mime_decode_list_to_string(List). -decode_binary(<>, A) -> - case element(C1, ?DECODE_MAP) of - ws -> decode_binary(Cs, A); - B1 -> decode_binary(Cs, A, B1) +%% Skipping pad character if not at end of string. Also liberal about +%% excess padding and skipping of other illegal (non-base64 alphabet) +%% characters. See section 3.3 of RFC4648 +mime_decode_list([0 | Cs], A) -> + mime_decode_list(Cs, A); +mime_decode_list([C1 | Cs], A) -> + case b64d(C1) of + B1 when is_integer(B1) -> mime_decode_list(Cs, A, B1); + _ -> mime_decode_list(Cs, A) % eq is padding end; -decode_binary(<<>>, A) -> +mime_decode_list([], A) -> A. -decode_binary(<>, A, B1) -> - case element(C2, ?DECODE_MAP) of - ws -> decode_binary(Cs, A, B1); - B2 -> decode_binary(Cs, A, B1, B2) +mime_decode_list([0 | Cs], A, B1) -> + mime_decode_list(Cs, A, B1); +mime_decode_list([C2 | Cs], A, B1) -> + case b64d(C2) of + B2 when is_integer(B2) -> + mime_decode_list(Cs, A, B1, B2); + _ -> mime_decode_list(Cs, A, B1) % eq is padding end. -decode_binary(<>, A, B1, B2) -> - case element(C3, ?DECODE_MAP) of - ws -> decode_binary(Cs, A, B1, B2); - B3 -> decode_binary(Cs, A, B1, B2, B3) +mime_decode_list([0 | Cs], A, B1, B2) -> + mime_decode_list(Cs, A, B1, B2); +mime_decode_list([C3 | Cs], A, B1, B2) -> + case b64d(C3) of + B3 when is_integer(B3) -> + mime_decode_list(Cs, A, B1, B2, B3); + eq=B3 -> + mime_decode_list_after_eq(Cs, A, B1, B2, B3); + _ -> mime_decode_list(Cs, A, B1, B2) end. -decode_binary(<>, A, B1, B2, B3) -> - case element(C4, ?DECODE_MAP) of - ws -> decode_binary(Cs, A, B1, B2, B3); - eq when B3 =:= eq -> only_ws_binary(Cs, <>); - eq -> only_ws_binary(Cs, <>); - B4 -> decode_binary(Cs, <>) +mime_decode_list([0 | Cs], A, B1, B2, B3) -> + mime_decode_list(Cs, A, B1, B2, B3); +mime_decode_list([C4 | Cs], A, B1, B2, B3) -> + case b64d(C4) of + B4 when is_integer(B4) -> + mime_decode_list(Cs, <>); + eq -> + mime_decode_list_after_eq(Cs, A, B1, B2, B3); + _ -> mime_decode_list(Cs, A, B1, B2, B3) end. -only_ws_binary(<<>>, A) -> - A; -only_ws_binary(<>, A) -> - case element(C, ?DECODE_MAP) of - ws -> only_ws_binary(Cs, A); - _ -> erlang:error(function_clause) +mime_decode_list_after_eq([0 | Cs], A, B1, B2, B3) -> + mime_decode_list_after_eq(Cs, A, B1, B2, B3); +mime_decode_list_after_eq([C | Cs], A, B1, B2, B3) -> + case b64d(C) of + B when is_integer(B) -> + %% More valid data, skip the eq as invalid + case B3 of + eq -> mime_decode_list(Cs, A, B1, B2, B); + _ -> mime_decode_list(Cs, <>) + end; + _ -> mime_decode_list_after_eq(Cs, A, B1, B2, B3) + end; +mime_decode_list_after_eq([], A, B1, B2, eq) -> + <>; +mime_decode_list_after_eq([], A, B1, B2, B3) -> + <>. + +mime_decode_binary(<<0:8, Cs/bits>>, A) -> + mime_decode_binary(Cs, A); +mime_decode_binary(<>, A) -> + case b64d(C1) of + B1 when is_integer(B1) -> mime_decode_binary(Cs, A, B1); + _ -> mime_decode_binary(Cs, A) % eq is padding + end; +mime_decode_binary(<<>>, A) -> + A. + +mime_decode_binary(<<0:8, Cs/bits>>, A, B1) -> + mime_decode_binary(Cs, A, B1); +mime_decode_binary(<>, A, B1) -> + case b64d(C2) of + B2 when is_integer(B2) -> + mime_decode_binary(Cs, A, B1, B2); + _ -> mime_decode_binary(Cs, A, B1) % eq is padding end. -%% Skipping pad character if not at end of string. Also liberal about -%% excess padding and skipping of other illegal (non-base64 alphabet) -%% characters. See section 3.3 of RFC4648 -mime_decode_binary(Result, <<0:8,T/bits>>) -> - mime_decode_binary(Result, T); -mime_decode_binary(Result0, <>) -> - case element(C, ?DECODE_MAP) of - Bits when is_integer(Bits) -> - mime_decode_binary(<>, T); +mime_decode_binary(<<0:8, Cs/bits>>, A, B1, B2) -> + mime_decode_binary(Cs, A, B1, B2); +mime_decode_binary(<>, A, B1, B2) -> + case b64d(C3) of + B3 when is_integer(B3) -> + mime_decode_binary(Cs, A, B1, B2, B3); + eq=B3 -> + mime_decode_binary_after_eq(Cs, A, B1, B2, B3); + _ -> mime_decode_binary(Cs, A, B1, B2) + end. + +mime_decode_binary(<<0:8, Cs/bits>>, A, B1, B2, B3) -> + mime_decode_binary(Cs, A, B1, B2, B3); +mime_decode_binary(<>, A, B1, B2, B3) -> + case b64d(C4) of + B4 when is_integer(B4) -> + mime_decode_binary(Cs, <>); eq -> - mime_decode_binary_after_eq(Result0, T, false); - _ -> - mime_decode_binary(Result0, T) + mime_decode_binary_after_eq(Cs, A, B1, B2, B3); + _ -> mime_decode_binary(Cs, A, B1, B2, B3) + end. + +mime_decode_binary_after_eq(<<0:8, Cs/bits>>, A, B1, B2, B3) -> + mime_decode_binary_after_eq(Cs, A, B1, B2, B3); +mime_decode_binary_after_eq(<>, A, B1, B2, B3) -> + case b64d(C) of + B when is_integer(B) -> + %% More valid data, skip the eq as invalid + case B3 of + eq -> mime_decode_binary(Cs, A, B1, B2, B); + _ -> mime_decode_binary(Cs, <>) + end; + _ -> mime_decode_binary_after_eq(Cs, A, B1, B2, B3) end; -mime_decode_binary(Result, _) -> - true = is_binary(Result), - Result. - -mime_decode_binary_after_eq(Result, <<0:8,T/bits>>, Eq) -> - mime_decode_binary_after_eq(Result, T, Eq); -mime_decode_binary_after_eq(Result0, <>, Eq) -> - case element(C, ?DECODE_MAP) of - bad -> - mime_decode_binary_after_eq(Result0, T, Eq); - ws -> - mime_decode_binary_after_eq(Result0, T, Eq); +mime_decode_binary_after_eq(<<>>, A, B1, B2, eq) -> + <>; +mime_decode_binary_after_eq(<<>>, A, B1, B2, B3) -> + <>. + +mime_decode_list_to_string([0 | Cs]) -> + mime_decode_list_to_string(Cs); +mime_decode_list_to_string([C1 | Cs]) -> + case b64d(C1) of + B1 when is_integer(B1) -> mime_decode_list_to_string(Cs, B1); + _ -> mime_decode_list_to_string(Cs) % eq is padding + end; +mime_decode_list_to_string([]) -> + []. + +mime_decode_list_to_string([0 | Cs], B1) -> + mime_decode_list_to_string(Cs, B1); +mime_decode_list_to_string([C2 | Cs], B1) -> + case b64d(C2) of + B2 when is_integer(B2) -> + mime_decode_list_to_string(Cs, B1, B2); + _ -> mime_decode_list_to_string(Cs, B1) % eq is padding + end. + +mime_decode_list_to_string([0 | Cs], B1, B2) -> + mime_decode_list_to_string(Cs, B1, B2); +mime_decode_list_to_string([C3 | Cs], B1, B2) -> + case b64d(C3) of + B3 when is_integer(B3) -> + mime_decode_list_to_string(Cs, B1, B2, B3); + eq=B3 -> mime_decode_list_to_string_after_eq(Cs, B1, B2, B3); + _ -> mime_decode_list_to_string(Cs, B1, B2) + end. + +mime_decode_list_to_string([0 | Cs], B1, B2, B3) -> + mime_decode_list_to_string(Cs, B1, B2, B3); +mime_decode_list_to_string([C4 | Cs], B1, B2, B3) -> + case b64d(C4) of + B4 when is_integer(B4) -> + Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4, + Octet1 = Bits4x6 bsr 16, + Octet2 = (Bits4x6 bsr 8) band 16#ff, + Octet3 = Bits4x6 band 16#ff, + [Octet1, Octet2, Octet3 | mime_decode_list_to_string(Cs)]; eq -> - mime_decode_binary_after_eq(Result0, T, true); - Bits when is_integer(Bits) -> + mime_decode_list_to_string_after_eq(Cs, B1, B2, B3); + _ -> mime_decode_list_to_string(Cs, B1, B2, B3) + end. + +mime_decode_list_to_string_after_eq([0 | Cs], B1, B2, B3) -> + mime_decode_list_to_string_after_eq(Cs, B1, B2, B3); +mime_decode_list_to_string_after_eq([C | Cs], B1, B2, B3) -> + case b64d(C) of + B when is_integer(B) -> %% More valid data, skip the eq as invalid - mime_decode_binary(<>, T) + case B3 of + eq -> mime_decode_list_to_string(Cs, B1, B2, B); + _ -> + Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B, + Octet1 = Bits4x6 bsr 16, + Octet2 = (Bits4x6 bsr 8) band 16#ff, + Octet3 = Bits4x6 band 16#ff, + [Octet1, Octet2, Octet3 | mime_decode_list_to_string(Cs)] + end; + _ -> mime_decode_list_to_string_after_eq(Cs, B1, B2, B3) + end; +mime_decode_list_to_string_after_eq([], B1, B2, eq) -> + binary_to_list(<>); +mime_decode_list_to_string_after_eq([], B1, B2, B3) -> + binary_to_list(<>). + +mime_decode_binary_to_string(<<0:8, Cs/bits>>) -> + mime_decode_binary_to_string(Cs); +mime_decode_binary_to_string(<>) -> + case b64d(C1) of + B1 when is_integer(B1) -> mime_decode_binary_to_string(Cs, B1); + _ -> mime_decode_binary_to_string(Cs) % eq is padding end; -mime_decode_binary_after_eq(Result0, <<>>, Eq) -> - %% No more valid data. - case bit_size(Result0) rem 8 of - 0 -> - %% '====' is not uncommon. - Result0; - 4 when Eq -> - %% enforce at least one more '=' only ignoring illegals and spacing - Split = byte_size(Result0) - 1, - <> = Result0, - Result; - 2 -> - %% remove 2 bits - Split = byte_size(Result0) - 1, - <> = Result0, - Result +mime_decode_binary_to_string(<<>>) -> + []. + +mime_decode_binary_to_string(<<0:8, Cs/bits>>, B1) -> + mime_decode_binary_to_string(Cs, B1); +mime_decode_binary_to_string(<>, B1) -> + case b64d(C2) of + B2 when is_integer(B2) -> + mime_decode_binary_to_string(Cs, B1, B2); + _ -> mime_decode_binary_to_string(Cs, B1) % eq is padding + end. + +mime_decode_binary_to_string(<<0:8, Cs/bits>>, B1, B2) -> + mime_decode_binary_to_string(Cs, B1, B2); +mime_decode_binary_to_string(<>, B1, B2) -> + case b64d(C3) of + B3 when is_integer(B3) -> + mime_decode_binary_to_string(Cs, B1, B2, B3); + eq=B3 -> mime_decode_binary_to_string_after_eq(Cs, B1, B2, B3); + _ -> mime_decode_binary_to_string(Cs, B1, B2) + end. + +mime_decode_binary_to_string(<<0:8, Cs/bits>>, B1, B2, B3) -> + mime_decode_binary_to_string(Cs, B1, B2, B3); +mime_decode_binary_to_string(<>, B1, B2, B3) -> + case b64d(C4) of + B4 when is_integer(B4) -> + Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4, + Octet1 = Bits4x6 bsr 16, + Octet2 = (Bits4x6 bsr 8) band 16#ff, + Octet3 = Bits4x6 band 16#ff, + [Octet1, Octet2, Octet3 | mime_decode_binary_to_string(Cs)]; + eq -> + mime_decode_binary_to_string_after_eq(Cs, B1, B2, B3); + _ -> mime_decode_binary_to_string(Cs, B1, B2, B3) end. +mime_decode_binary_to_string_after_eq(<<0:8, Cs/bits>>, B1, B2, B3) -> + mime_decode_binary_to_string_after_eq(Cs, B1, B2, B3); +mime_decode_binary_to_string_after_eq(<>=Cs0, B1, B2, B3) -> + case b64d(C) of + B when is_integer(B) -> + %% More valid data, skip the eq as invalid + case B3 of + eq -> mime_decode_binary_to_string(Cs, B1, B2, B); + _ -> mime_decode_binary_to_string(Cs0, B1, B2, B3) + end; + _ -> mime_decode_binary_to_string_after_eq(Cs, B1, B2, B3) + end; +mime_decode_binary_to_string_after_eq(<<>>, B1, B2, eq) -> + binary_to_list(<>); +mime_decode_binary_to_string_after_eq(<<>>, B1, B2, B3) -> + binary_to_list(<>). + decode_list([C1 | Cs], A) -> - case element(C1, ?DECODE_MAP) of + case b64d(C1) of ws -> decode_list(Cs, A); B1 -> decode_list(Cs, A, B1) end; @@ -281,122 +409,167 @@ decode_list([], A) -> A. decode_list([C2 | Cs], A, B1) -> - case element(C2, ?DECODE_MAP) of + case b64d(C2) of ws -> decode_list(Cs, A, B1); B2 -> decode_list(Cs, A, B1, B2) end. decode_list([C3 | Cs], A, B1, B2) -> - case element(C3, ?DECODE_MAP) of + case b64d(C3) of ws -> decode_list(Cs, A, B1, B2); B3 -> decode_list(Cs, A, B1, B2, B3) end. decode_list([C4 | Cs], A, B1, B2, B3) -> - case element(C4, ?DECODE_MAP) of + case b64d(C4) of ws -> decode_list(Cs, A, B1, B2, B3); eq when B3 =:= eq -> only_ws(Cs, <>); eq -> only_ws(Cs, <>); B4 -> decode_list(Cs, <>) end. +decode_binary(<>, A) -> + case b64d(C1) of + ws -> decode_binary(Cs, A); + B1 -> decode_binary(Cs, A, B1) + end; +decode_binary(<<>>, A) -> + A. + +decode_binary(<>, A, B1) -> + case b64d(C2) of + ws -> decode_binary(Cs, A, B1); + B2 -> decode_binary(Cs, A, B1, B2) + end. + +decode_binary(<>, A, B1, B2) -> + case b64d(C3) of + ws -> decode_binary(Cs, A, B1, B2); + B3 -> decode_binary(Cs, A, B1, B2, B3) + end. + +decode_binary(<>, A, B1, B2, B3) -> + case b64d(C4) of + ws -> decode_binary(Cs, A, B1, B2, B3); + eq when B3 =:= eq -> only_ws_binary(Cs, <>); + eq -> only_ws_binary(Cs, <>); + B4 -> decode_binary(Cs, <>) + end. + +decode_list_to_string([C1 | Cs]) -> + case b64d(C1) of + ws -> decode_list_to_string(Cs); + B1 -> decode_list_to_string(Cs, B1) + end; +decode_list_to_string([]) -> + []. + +decode_list_to_string([C2 | Cs], B1) -> + case b64d(C2) of + ws -> decode_list_to_string(Cs, B1); + B2 -> decode_list_to_string(Cs, B1, B2) + end. + +decode_list_to_string([C3 | Cs], B1, B2) -> + case b64d(C3) of + ws -> decode_list_to_string(Cs, B1, B2); + B3 -> decode_list_to_string(Cs, B1, B2, B3) + end. + +decode_list_to_string([C4 | Cs], B1, B2, B3) -> + case b64d(C4) of + ws -> + decode_list_to_string(Cs, B1, B2, B3); + eq when B3 =:= eq -> + only_ws(Cs, binary_to_list(<>)); + eq -> + only_ws(Cs, binary_to_list(<>)); + B4 -> + Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4, + Octet1 = Bits4x6 bsr 16, + Octet2 = (Bits4x6 bsr 8) band 16#ff, + Octet3 = Bits4x6 band 16#ff, + [Octet1, Octet2, Octet3 | decode_list_to_string(Cs)] + end. + only_ws([], A) -> A; only_ws([C | Cs], A) -> - case element(C, ?DECODE_MAP) of + case b64d(C) of ws -> only_ws(Cs, A); _ -> erlang:error(function_clause) end. -decode([], A) -> A; -decode([$=,$=,C2,C1|Cs], A) -> - Bits2x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12), - Octet1 = Bits2x6 bsr 16, - decode(Cs, [Octet1|A]); -decode([$=,C3,C2,C1|Cs], A) -> - Bits3x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12) - bor (b64d(C3) bsl 6), - Octet1 = Bits3x6 bsr 16, - Octet2 = (Bits3x6 bsr 8) band 16#ff, - decode(Cs, [Octet1,Octet2|A]); -decode([C4,C3,C2,C1| Cs], A) -> - Bits4x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12) - bor (b64d(C3) bsl 6) bor b64d(C4), - Octet1 = Bits4x6 bsr 16, - Octet2 = (Bits4x6 bsr 8) band 16#ff, - Octet3 = Bits4x6 band 16#ff, - decode(Cs, [Octet1,Octet2,Octet3|A]). +decode_binary_to_string(<>) -> + case b64d(C1) of + ws -> decode_binary_to_string(Cs); + B1 -> decode_binary_to_string(Cs, B1) + end; +decode_binary_to_string(<<>>) -> + []. -%%%======================================================================== -%%% Internal functions -%%%======================================================================== +decode_binary_to_string(<>, B1) -> + case b64d(C2) of + ws -> decode_binary_to_string(Cs, B1); + B2 -> decode_binary_to_string(Cs, B1, B2) + end. -strip_spaces([], A) -> A; -strip_spaces([$\s|Cs], A) -> strip_spaces(Cs, A); -strip_spaces([$\t|Cs], A) -> strip_spaces(Cs, A); -strip_spaces([$\r|Cs], A) -> strip_spaces(Cs, A); -strip_spaces([$\n|Cs], A) -> strip_spaces(Cs, A); -strip_spaces([C|Cs], A) -> strip_spaces(Cs, [C | A]). +decode_binary_to_string(<>, B1, B2) -> + case b64d(C3) of + ws -> decode_binary_to_string(Cs, B1, B2); + B3 -> decode_binary_to_string(Cs, B1, B2, B3) + end. -%% Skipping pad character if not at end of string. Also liberal about -%% excess padding and skipping of other illegal (non-base64 alphabet) -%% characters. See section 3.3 of RFC4648 -strip_illegal([], A, _Cnt) -> - A; -strip_illegal([0|Cs], A, Cnt) -> - strip_illegal(Cs, A, Cnt); -strip_illegal([C|Cs], A, Cnt) -> - case element(C, ?DECODE_MAP) of - bad -> - strip_illegal(Cs, A, Cnt); - ws -> - strip_illegal(Cs, A, Cnt); - eq -> - case {tail_contains_more(Cs, false), Cnt rem 4} of - {{[], _}, 0} -> - A; %% Ignore extra = - {{[], true}, 2} -> - [$=|[$=|A]]; %% 'XX==' - {{[], _}, 3} -> - [$=|A]; %% 'XXX=' - {{[H|T], _}, _} -> - %% more data, skip equals - strip_illegal(T, [H|A], Cnt+1) - end; - _ -> - strip_illegal(Cs, [C|A], Cnt+1) +decode_binary_to_string(<>, B1, B2, B3) -> + case b64d(C4) of + ws -> decode_binary_to_string(Cs, B1, B2, B3); + eq when B3 =:= eq -> + only_ws_binary(Cs, binary_to_list(<>)); + eq -> + only_ws_binary(Cs, binary_to_list(<>)); + B4 -> + Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4, + Octet1 = Bits4x6 bsr 16, + Octet2 = (Bits4x6 bsr 8) band 16#ff, + Octet3 = Bits4x6 band 16#ff, + [Octet1, Octet2, Octet3 | decode_binary_to_string(Cs)] end. -%% Search the tail for more valid data and remember if we saw -%% another equals along the way. -tail_contains_more([], Eq) -> - {[], Eq}; -tail_contains_more(<<>>, Eq) -> - {<<>>, Eq}; -tail_contains_more([C|T]=More, Eq) -> - case element(C, ?DECODE_MAP) of - bad -> - tail_contains_more(T, Eq); - ws -> - tail_contains_more(T, Eq); - eq -> - tail_contains_more(T, true); - _ -> - {More, Eq} - end; -tail_contains_more(<> =More, Eq) -> - case element(C, ?DECODE_MAP) of - bad -> - tail_contains_more(T, Eq); - ws -> - tail_contains_more(T, Eq); - eq -> - tail_contains_more(T, true); - _ -> - {More, Eq} +only_ws_binary(<<>>, A) -> + A; +only_ws_binary(<>, A) -> + case b64d(C) of + ws -> only_ws_binary(Cs, A); + _ -> erlang:error(function_clause) end. - + +%%%======================================================================== +%%% Internal functions +%%%======================================================================== + %% accessors +-compile({inline, [{b64d, 1}]}). +%% One-based decode map. +b64d(X) -> + element(X, + {bad,bad,bad,bad,bad,bad,bad,bad,ws,ws,bad,bad,ws,bad,bad, %1-15 + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, %16-31 + ws,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,62,bad,bad,bad,63, %32-47 + 52,53,54,55,56,57,58,59,60,61,bad,bad,bad,eq,bad,bad, %48-63 + bad,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14, + 15,16,17,18,19,20,21,22,23,24,25,bad,bad,bad,bad,bad, + bad,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, + 41,42,43,44,45,46,47,48,49,50,51,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad}). + +-compile({inline, [{b64e, 1}]}). b64e(X) -> element(X+1, {$A, $B, $C, $D, $E, $F, $G, $H, $I, $J, $K, $L, $M, $N, @@ -404,9 +577,3 @@ b64e(X) -> $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p, $q, $r, $s, $t, $u, $v, $w, $x, $y, $z, $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $+, $/}). - - -b64d(X) -> - b64d_ok(element(X, ?DECODE_MAP)). - -b64d_ok(I) when is_integer(I) -> I. diff --git a/lib/stdlib/test/base64_SUITE.erl b/lib/stdlib/test/base64_SUITE.erl index 48b3f5f959..1715b2ebe6 100644 --- a/lib/stdlib/test/base64_SUITE.erl +++ b/lib/stdlib/test/base64_SUITE.erl @@ -119,44 +119,51 @@ illegal(Config) when is_list(Config) -> ok. %%------------------------------------------------------------------------- %% mime_decode and mime_decode_to_string have different implementations -%% so test both with the same input separately. Both functions have -%% the same implementation for binary/string arguments. +%% so test both with the same input separately. %% %% Test base64:mime_decode/1. mime_decode(Config) when is_list(Config) -> + MimeDecode = fun(In) -> + Out = base64:mime_decode(In), + Out = base64:mime_decode(binary_to_list(In)) + end, %% Test correct padding - <<"one">> = base64:mime_decode(<<"b25l">>), - <<"on">> = base64:mime_decode(<<"b24=">>), - <<"o">> = base64:mime_decode(<<"bw==">>), + <<"one">> = MimeDecode(<<"b25l">>), + <<"on">> = MimeDecode(<<"b24=">>), + <<"o">> = MimeDecode(<<"bw==">>), %% Test 1 extra padding - <<"one">> = base64:mime_decode(<<"b25l= =">>), - <<"on">> = base64:mime_decode(<<"b24== =">>), - <<"o">> = base64:mime_decode(<<"bw=== =">>), + <<"one">> = MimeDecode(<<"b25l= =">>), + <<"on">> = MimeDecode(<<"b24== =">>), + <<"o">> = MimeDecode(<<"bw=== =">>), %% Test 2 extra padding - <<"one">> = base64:mime_decode(<<"b25l===">>), - <<"on">> = base64:mime_decode(<<"b24====">>), - <<"o">> = base64:mime_decode(<<"bw=====">>), + <<"one">> = MimeDecode(<<"b25l===">>), + <<"on">> = MimeDecode(<<"b24====">>), + <<"o">> = MimeDecode(<<"bw=====">>), %% Test misc embedded padding - <<"one">> = base64:mime_decode(<<"b2=5l===">>), - <<"on">> = base64:mime_decode(<<"b=24====">>), - <<"o">> = base64:mime_decode(<<"b=w=====">>), + <<"one">> = MimeDecode(<<"b2=5l===">>), + <<"on">> = MimeDecode(<<"b=24====">>), + <<"o">> = MimeDecode(<<"b=w=====">>), %% Test misc white space and illegals with embedded padding - <<"one">> = base64:mime_decode(<<" b~2=\r\n5()l===">>), - <<"on">> = base64:mime_decode(<<"\tb =2\"¤4=¤= ==">>), - <<"o">> = base64:mime_decode(<<"\nb=w=====">>), + <<"one">> = MimeDecode(<<" b~2=\r\n5()l===">>), + <<"on">> = MimeDecode(<<"\tb =2\"¤4=¤= ==">>), + <<"o">> = MimeDecode(<<"\nb=w=====">>), %% Two pads <<"Aladdin:open sesame">> = - base64:mime_decode("QWxhZGRpbjpvc()GVuIHNlc2FtZQ=="), + MimeDecode(<<"QWxhZGRpbjpvc()GVuIHNlc2FtZQ==">>), %% One pad to ignore, followed by more text - <<"Hello World!!">> = base64:mime_decode(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>), + <<"Hello World!!">> = MimeDecode(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>), %% No pad <<"Aladdin:open sesam">> = - base64:mime_decode("QWxhZGRpbjpvcG¤\")(VuIHNlc2Ft"), + MimeDecode(<<"QWxhZGRpbjpvcG¤\")(VuIHNlc2Ft">>), %% Encoded base 64 strings may be divided by non base 64 chars. %% In this cases whitespaces. <<"0123456789!@#0^&*();:<>,. []{}">> = - base64:mime_decode( + MimeDecode( <<"MDEy MzQ1Njc4 \tOSFAIzBeJ \nio)(oKTs6 PD4sLi \r\nBbXXt9">>), + %% Zeroes + <<"012">> = MimeDecode(<<"\000M\000D\000E\000y=\000">>), + <<"o">> = MimeDecode(<<"bw==\000">>), + <<"o">> = MimeDecode(<<"bw=\000=">>), ok. %%------------------------------------------------------------------------- @@ -165,39 +172,48 @@ mime_decode(Config) when is_list(Config) -> %% Test base64:mime_decode_to_string/1. mime_decode_to_string(Config) when is_list(Config) -> + MimeDecodeToString = + fun(In) -> + Out = base64:mime_decode_to_string(In), + Out = base64:mime_decode_to_string(binary_to_list(In)) + end, %% Test correct padding - "one" = base64:mime_decode_to_string(<<"b25l">>), - "on" = base64:mime_decode_to_string(<<"b24=">>), - "o" = base64:mime_decode_to_string(<<"bw==">>), + "one" = MimeDecodeToString(<<"b25l">>), + "on" = MimeDecodeToString(<<"b24=">>), + "o" = MimeDecodeToString(<<"bw==">>), %% Test 1 extra padding - "one" = base64:mime_decode_to_string(<<"b25l= =">>), - "on" = base64:mime_decode_to_string(<<"b24== =">>), - "o" = base64:mime_decode_to_string(<<"bw=== =">>), + "one" = MimeDecodeToString(<<"b25l= =">>), + "on" = MimeDecodeToString(<<"b24== =">>), + "o" = MimeDecodeToString(<<"bw=== =">>), %% Test 2 extra padding - "one" = base64:mime_decode_to_string(<<"b25l===">>), - "on" = base64:mime_decode_to_string(<<"b24====">>), - "o" = base64:mime_decode_to_string(<<"bw=====">>), + "one" = MimeDecodeToString(<<"b25l===">>), + "on" = MimeDecodeToString(<<"b24====">>), + "o" = MimeDecodeToString(<<"bw=====">>), %% Test misc embedded padding - "one" = base64:mime_decode_to_string(<<"b2=5l===">>), - "on" = base64:mime_decode_to_string(<<"b=24====">>), - "o" = base64:mime_decode_to_string(<<"b=w=====">>), + "one" = MimeDecodeToString(<<"b2=5l===">>), + "on" = MimeDecodeToString(<<"b=24====">>), + "o" = MimeDecodeToString(<<"b=w=====">>), %% Test misc white space and illegals with embedded padding - "one" = base64:mime_decode_to_string(<<" b~2=\r\n5()l===">>), - "on" = base64:mime_decode_to_string(<<"\tb =2\"¤4=¤= ==">>), - "o" = base64:mime_decode_to_string(<<"\nb=w=====">>), + "one" = MimeDecodeToString(<<" b~2=\r\n5()l===">>), + "on" = MimeDecodeToString(<<"\tb =2\"¤4=¤= ==">>), + "o" = MimeDecodeToString(<<"\nb=w=====">>), %% Two pads "Aladdin:open sesame" = - base64:mime_decode_to_string("QWxhZGRpbjpvc()GVuIHNlc2FtZQ=="), + MimeDecodeToString(<<"QWxhZGRpbjpvc()GVuIHNlc2FtZQ==">>), %% One pad to ignore, followed by more text - "Hello World!!" = base64:mime_decode_to_string(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>), + "Hello World!!" = MimeDecodeToString(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>), %% No pad "Aladdin:open sesam" = - base64:mime_decode_to_string("QWxhZGRpbjpvcG¤\")(VuIHNlc2Ft"), + MimeDecodeToString(<<"QWxhZGRpbjpvcG¤\")(VuIHNlc2Ft">>), %% Encoded base 64 strings may be divided by non base 64 chars. %% In this cases whitespaces. "0123456789!@#0^&*();:<>,. []{}" = - base64:mime_decode_to_string( + MimeDecodeToString( <<"MDEy MzQ1Njc4 \tOSFAIzBeJ \nio)(oKTs6 PD4sLi \r\nBbXXt9">>), + %% Zeroes + "012" = MimeDecodeToString(<<"\000M\000D\000E\000y=\000">>), + "o" = MimeDecodeToString(<<"bw==\000">>), + "o" = MimeDecodeToString(<<"bw=\000=">>), ok. %%------------------------------------------------------------------------- -- cgit v1.2.3 From fc80770ba7645d71a159c807c6aba7956a2eb394 Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Tue, 7 Nov 2017 17:22:16 +0100 Subject: stdlib: Use binary_to_list in base64 when it is faster It is sometimes faster to use binary_to_list on input, at least when not considering time for garbage collections. The three functions all return a list, and the temporary list created by binary_to_list should be reclaimed very fast. The implementation from before is thus kept. --- lib/stdlib/src/base64.erl | 134 ++++------------------------------------------ 1 file changed, 11 insertions(+), 123 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl index 6311f3c86b..6ff6020e53 100644 --- a/lib/stdlib/src/base64.erl +++ b/lib/stdlib/src/base64.erl @@ -34,7 +34,7 @@ Base64String :: ascii_string(). encode_to_string(Bin) when is_binary(Bin) -> - encode_binary_to_string(Bin); + encode_to_string(binary_to_list(Bin)); encode_to_string(List) when is_list(List) -> encode_list_to_string(List). @@ -47,22 +47,6 @@ encode(Bin) when is_binary(Bin) -> encode(List) when is_list(List) -> encode_list(List, <<>>). -encode_binary_to_string(<<>>) -> - []; -encode_binary_to_string(<>) -> - [b64e(B1 bsr 2), - b64e((B1 band 3) bsl 4), $=, $=]; -encode_binary_to_string(<>) -> - [b64e(B1 bsr 2), - b64e(((B1 band 3) bsl 4) bor (B2 bsr 4)), - b64e((B2 band 15) bsl 2), $=]; -encode_binary_to_string(<>) -> - BB = (B1 bsl 16) bor (B2 bsl 8) bor B3, - [b64e(BB bsr 18), - b64e((BB bsr 12) band 63), - b64e((BB bsr 6) band 63), - b64e(BB band 63) | encode_binary_to_string(Ls)]. - encode_list_to_string([]) -> []; encode_list_to_string([B1]) -> @@ -141,7 +125,7 @@ mime_decode(List) when is_list(List) -> DataString :: ascii_string(). decode_to_string(Bin) when is_binary(Bin) -> - decode_binary_to_string(Bin); + decode_to_string(binary_to_list(Bin)); decode_to_string(List) when is_list(List) -> decode_list_to_string(List). @@ -150,7 +134,7 @@ decode_to_string(List) when is_list(List) -> DataString :: ascii_string(). mime_decode_to_string(Bin) when is_binary(Bin) -> - mime_decode_binary_to_string(Bin); + mime_decode_to_string(binary_to_list(Bin)); mime_decode_to_string(List) when is_list(List) -> mime_decode_list_to_string(List). @@ -339,67 +323,6 @@ mime_decode_list_to_string_after_eq([], B1, B2, eq) -> mime_decode_list_to_string_after_eq([], B1, B2, B3) -> binary_to_list(<>). -mime_decode_binary_to_string(<<0:8, Cs/bits>>) -> - mime_decode_binary_to_string(Cs); -mime_decode_binary_to_string(<>) -> - case b64d(C1) of - B1 when is_integer(B1) -> mime_decode_binary_to_string(Cs, B1); - _ -> mime_decode_binary_to_string(Cs) % eq is padding - end; -mime_decode_binary_to_string(<<>>) -> - []. - -mime_decode_binary_to_string(<<0:8, Cs/bits>>, B1) -> - mime_decode_binary_to_string(Cs, B1); -mime_decode_binary_to_string(<>, B1) -> - case b64d(C2) of - B2 when is_integer(B2) -> - mime_decode_binary_to_string(Cs, B1, B2); - _ -> mime_decode_binary_to_string(Cs, B1) % eq is padding - end. - -mime_decode_binary_to_string(<<0:8, Cs/bits>>, B1, B2) -> - mime_decode_binary_to_string(Cs, B1, B2); -mime_decode_binary_to_string(<>, B1, B2) -> - case b64d(C3) of - B3 when is_integer(B3) -> - mime_decode_binary_to_string(Cs, B1, B2, B3); - eq=B3 -> mime_decode_binary_to_string_after_eq(Cs, B1, B2, B3); - _ -> mime_decode_binary_to_string(Cs, B1, B2) - end. - -mime_decode_binary_to_string(<<0:8, Cs/bits>>, B1, B2, B3) -> - mime_decode_binary_to_string(Cs, B1, B2, B3); -mime_decode_binary_to_string(<>, B1, B2, B3) -> - case b64d(C4) of - B4 when is_integer(B4) -> - Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4, - Octet1 = Bits4x6 bsr 16, - Octet2 = (Bits4x6 bsr 8) band 16#ff, - Octet3 = Bits4x6 band 16#ff, - [Octet1, Octet2, Octet3 | mime_decode_binary_to_string(Cs)]; - eq -> - mime_decode_binary_to_string_after_eq(Cs, B1, B2, B3); - _ -> mime_decode_binary_to_string(Cs, B1, B2, B3) - end. - -mime_decode_binary_to_string_after_eq(<<0:8, Cs/bits>>, B1, B2, B3) -> - mime_decode_binary_to_string_after_eq(Cs, B1, B2, B3); -mime_decode_binary_to_string_after_eq(<>=Cs0, B1, B2, B3) -> - case b64d(C) of - B when is_integer(B) -> - %% More valid data, skip the eq as invalid - case B3 of - eq -> mime_decode_binary_to_string(Cs, B1, B2, B); - _ -> mime_decode_binary_to_string(Cs0, B1, B2, B3) - end; - _ -> mime_decode_binary_to_string_after_eq(Cs, B1, B2, B3) - end; -mime_decode_binary_to_string_after_eq(<<>>, B1, B2, eq) -> - binary_to_list(<>); -mime_decode_binary_to_string_after_eq(<<>>, B1, B2, B3) -> - binary_to_list(<>). - decode_list([C1 | Cs], A) -> case b64d(C1) of ws -> decode_list(Cs, A); @@ -456,6 +379,14 @@ decode_binary(<>, A, B1, B2, B3) -> B4 -> decode_binary(Cs, <>) end. +only_ws_binary(<<>>, A) -> + A; +only_ws_binary(<>, A) -> + case b64d(C) of + ws -> only_ws_binary(Cs, A); + _ -> erlang:error(function_clause) + end. + decode_list_to_string([C1 | Cs]) -> case b64d(C1) of ws -> decode_list_to_string(Cs); @@ -500,49 +431,6 @@ only_ws([C | Cs], A) -> _ -> erlang:error(function_clause) end. -decode_binary_to_string(<>) -> - case b64d(C1) of - ws -> decode_binary_to_string(Cs); - B1 -> decode_binary_to_string(Cs, B1) - end; -decode_binary_to_string(<<>>) -> - []. - -decode_binary_to_string(<>, B1) -> - case b64d(C2) of - ws -> decode_binary_to_string(Cs, B1); - B2 -> decode_binary_to_string(Cs, B1, B2) - end. - -decode_binary_to_string(<>, B1, B2) -> - case b64d(C3) of - ws -> decode_binary_to_string(Cs, B1, B2); - B3 -> decode_binary_to_string(Cs, B1, B2, B3) - end. - -decode_binary_to_string(<>, B1, B2, B3) -> - case b64d(C4) of - ws -> decode_binary_to_string(Cs, B1, B2, B3); - eq when B3 =:= eq -> - only_ws_binary(Cs, binary_to_list(<>)); - eq -> - only_ws_binary(Cs, binary_to_list(<>)); - B4 -> - Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4, - Octet1 = Bits4x6 bsr 16, - Octet2 = (Bits4x6 bsr 8) band 16#ff, - Octet3 = Bits4x6 band 16#ff, - [Octet1, Octet2, Octet3 | decode_binary_to_string(Cs)] - end. - -only_ws_binary(<<>>, A) -> - A; -only_ws_binary(<>, A) -> - case b64d(C) of - ws -> only_ws_binary(Cs, A); - _ -> erlang:error(function_clause) - end. - %%%======================================================================== %%% Internal functions %%%======================================================================== -- cgit v1.2.3 From f3756139b99a77a29dd7325d0b2d706eed8a881f Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Tue, 7 Nov 2017 17:38:39 +0100 Subject: stdlib: Minor optimization of base64 Using /bits instead of /binary is faster when constructing binaries. --- lib/stdlib/src/base64.erl | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl index 6ff6020e53..b4ff7b037f 100644 --- a/lib/stdlib/src/base64.erl +++ b/lib/stdlib/src/base64.erl @@ -66,15 +66,15 @@ encode_list_to_string([B1,B2,B3|Ls]) -> encode_binary(<<>>, A) -> A; encode_binary(<>, A) -> - <>; + <>; encode_binary(<>, A) -> - <>; encode_binary(<>, A) -> BB = (B1 bsl 16) bor (B2 bsl 8) bor B3, encode_binary(Ls, - <>). @@ -82,15 +82,15 @@ encode_binary(<>, A) -> encode_list([], A) -> A; encode_list([B1], A) -> - <>; + <>; encode_list([B1,B2], A) -> - <>; encode_list([B1,B2,B3|Ls], A) -> BB = (B1 bsl 16) bor (B2 bsl 8) bor B3, encode_list(Ls, - <>). @@ -195,9 +195,9 @@ mime_decode_list_after_eq([C | Cs], A, B1, B2, B3) -> _ -> mime_decode_list_after_eq(Cs, A, B1, B2, B3) end; mime_decode_list_after_eq([], A, B1, B2, eq) -> - <>; + <>; mime_decode_list_after_eq([], A, B1, B2, B3) -> - <>. + <>. mime_decode_binary(<<0:8, Cs/bits>>, A) -> mime_decode_binary(Cs, A); @@ -253,9 +253,9 @@ mime_decode_binary_after_eq(<>, A, B1, B2, B3) -> _ -> mime_decode_binary_after_eq(Cs, A, B1, B2, B3) end; mime_decode_binary_after_eq(<<>>, A, B1, B2, eq) -> - <>; + <>; mime_decode_binary_after_eq(<<>>, A, B1, B2, B3) -> - <>. + <>. mime_decode_list_to_string([0 | Cs]) -> mime_decode_list_to_string(Cs); @@ -346,9 +346,9 @@ decode_list([C3 | Cs], A, B1, B2) -> decode_list([C4 | Cs], A, B1, B2, B3) -> case b64d(C4) of ws -> decode_list(Cs, A, B1, B2, B3); - eq when B3 =:= eq -> only_ws(Cs, <>); - eq -> only_ws(Cs, <>); - B4 -> decode_list(Cs, <>) + eq when B3 =:= eq -> only_ws(Cs, <>); + eq -> only_ws(Cs, <>); + B4 -> decode_list(Cs, <>) end. decode_binary(<>, A) -> @@ -374,9 +374,9 @@ decode_binary(<>, A, B1, B2) -> decode_binary(<>, A, B1, B2, B3) -> case b64d(C4) of ws -> decode_binary(Cs, A, B1, B2, B3); - eq when B3 =:= eq -> only_ws_binary(Cs, <>); - eq -> only_ws_binary(Cs, <>); - B4 -> decode_binary(Cs, <>) + eq when B3 =:= eq -> only_ws_binary(Cs, <>); + eq -> only_ws_binary(Cs, <>); + B4 -> decode_binary(Cs, <>) end. only_ws_binary(<<>>, A) -> -- cgit v1.2.3 From 2ead5d429fe87ffedf0134d918c3b404e9fa70fe Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Wed, 8 Nov 2017 12:41:23 +0100 Subject: stdlib: Do not check base64 input more than needed Often the decode functions return a function_clause error, but not always, and the errors have not been consistent between modifications of the base64 module. Now the errors are returned as they happen--no attempt to make them look nice is done. The alternative, to ensure that, for example, {badarg, Culprit} is always returned upon bad input, was deemed pointless. --- lib/stdlib/src/base64.erl | 6 ++---- lib/stdlib/test/base64_SUITE.erl | 13 +++++++++---- 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl index b4ff7b037f..6ea4147abf 100644 --- a/lib/stdlib/src/base64.erl +++ b/lib/stdlib/src/base64.erl @@ -383,8 +383,7 @@ only_ws_binary(<<>>, A) -> A; only_ws_binary(<>, A) -> case b64d(C) of - ws -> only_ws_binary(Cs, A); - _ -> erlang:error(function_clause) + ws -> only_ws_binary(Cs, A) end. decode_list_to_string([C1 | Cs]) -> @@ -427,8 +426,7 @@ only_ws([], A) -> A; only_ws([C | Cs], A) -> case b64d(C) of - ws -> only_ws(Cs, A); - _ -> erlang:error(function_clause) + ws -> only_ws(Cs, A) end. %%%======================================================================== diff --git a/lib/stdlib/test/base64_SUITE.erl b/lib/stdlib/test/base64_SUITE.erl index 1715b2ebe6..1fc4c3fc0e 100644 --- a/lib/stdlib/test/base64_SUITE.erl +++ b/lib/stdlib/test/base64_SUITE.erl @@ -97,10 +97,9 @@ base64_otp_5635(Config) when is_list(Config) -> <<"===">> = base64:decode(base64:encode("===")), ok. %%------------------------------------------------------------------------- -%% OTP-6279: Guard needed so that function fails in a correct -%% way for faulty input, i.e. function_clause. +%% OTP-6279: Make sure illegal characters are rejected when decoding. base64_otp_6279(Config) when is_list(Config) -> - {'EXIT',{function_clause, _}} = (catch base64:decode("dGVzda==a")), + {'EXIT',_} = (catch base64:decode("dGVzda==a")), ok. %%------------------------------------------------------------------------- %% Encode and decode big binaries. @@ -115,7 +114,13 @@ big(Config) when is_list(Config) -> %%------------------------------------------------------------------------- %% Make sure illegal characters are rejected when decoding. illegal(Config) when is_list(Config) -> - {'EXIT',{function_clause, _}} = (catch base64:decode("()")), + %% A few samples with different error reasons. Nothing can be + %% assumed about the reason for the crash. + {'EXIT',_} = (catch base64:decode("()")), + {'EXIT',_} = (catch base64:decode(<<19:8,20:8,21:8,22:8>>)), + {'EXIT',_} = (catch base64:decode([19,20,21,22])), + {'EXIT',_} = (catch base64:decode_to_string(<<19:8,20:8,21:8,22:8>>)), + {'EXIT',_} = (catch base64:decode_to_string([19,20,21,22])), ok. %%------------------------------------------------------------------------- %% mime_decode and mime_decode_to_string have different implementations -- cgit v1.2.3 From 7530b72480eb68cc9a5d700ee7f22c5069b61514 Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Thu, 30 Nov 2017 16:09:26 +0100 Subject: stdlib: Add base64 benchmarks --- lib/stdlib/test/stdlib_bench_SUITE.erl | 107 ++++++++++++++++++++++++++++++++- 1 file changed, 104 insertions(+), 3 deletions(-) (limited to 'lib/stdlib') diff --git a/lib/stdlib/test/stdlib_bench_SUITE.erl b/lib/stdlib/test/stdlib_bench_SUITE.erl index 8670e7029c..2a9981bb9e 100644 --- a/lib/stdlib/test/stdlib_bench_SUITE.erl +++ b/lib/stdlib/test/stdlib_bench_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2012-2016. All Rights Reserved. +%% Copyright Ericsson AB 2012-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -28,13 +28,20 @@ suite() -> [{ct_hooks,[{ts_install_cth,[{nodenames,2}]}]}]. all() -> - [{group,unicode}]. + [{group,unicode}, {group,base64}]. groups() -> [{unicode,[{repeat,5}], [norm_nfc_list, norm_nfc_deep_l, norm_nfc_binary, string_lexemes_list, string_lexemes_binary - ]}]. + ]}, + {base64,[{repeat,5}], + [decode_binary, decode_binary_to_string, + decode_list, decode_list_to_string, + encode_binary, encode_binary_to_string, + encode_list, encode_list_to_string, + mime_binary_decode, mime_binary_decode_to_string, + mime_list_decode, mime_list_decode_to_string]}]. init_per_group(_GroupName, Config) -> Config. @@ -105,3 +112,97 @@ norm_data(Config) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +decode_binary(_Config) -> + test(decode, encoded_binary()). + +decode_binary_to_string(_Config) -> + test(decode_to_string, encoded_binary()). + +decode_list(_Config) -> + test(decode, encoded_list()). + +decode_list_to_string(_Config) -> + test(decode_to_string, encoded_list()). + +encode_binary(_Config) -> + test(encode, binary()). + +encode_binary_to_string(_Config) -> + test(encode_to_string, binary()). + +encode_list(_Config) -> + test(encode, list()). + +encode_list_to_string(_Config) -> + test(encode_to_string, list()). + +mime_binary_decode(_Config) -> + test(mime_decode, encoded_binary()). + +mime_binary_decode_to_string(_Config) -> + test(mime_decode_to_string, encoded_binary()). + +mime_list_decode(_Config) -> + test(mime_decode, encoded_list()). + +mime_list_decode_to_string(_Config) -> + test(mime_decode_to_string, encoded_list()). + +-define(SIZE, 10000). +-define(N, 1000). + +encoded_binary() -> + list_to_binary(encoded_list()). + +encoded_list() -> + L = random_byte_list(round(?SIZE*0.75)), + base64:encode_to_string(L). + +binary() -> + list_to_binary(list()). + +list() -> + random_byte_list(?SIZE). + +test(Func, Data) -> + F = fun() -> loop(?N, Func, Data) end, + {Time, ok} = timer:tc(fun() -> lspawn(F) end), + report_base64(Time). + +loop(0, _F, _D) -> garbage_collect(), ok; +loop(N, F, D) -> + _ = base64:F(D), + loop(N - 1, F, D). + +lspawn(Fun) -> + {Pid, Ref} = spawn_monitor(fun() -> exit(Fun()) end), + receive + {'DOWN', Ref, process, Pid, Rep} -> Rep + end. + +report_base64(Time) -> + Tps = round((?N*1000000)/Time), + ct_event:notify(#event{name = benchmark_data, + data = [{suite, "stdlib_base64"}, + {value, Tps}]}), + Tps. + +%% Copied from base64_SUITE.erl. + +random_byte_list(N) -> + random_byte_list(N, []). + +random_byte_list(0, Acc) -> + Acc; +random_byte_list(N, Acc) -> + random_byte_list(N-1, [rand:uniform(255)|Acc]). + +make_big_binary(N) -> + list_to_binary(mbb(N, [])). + +mbb(N, Acc) when N > 256 -> + B = list_to_binary(lists:seq(0, 255)), + mbb(N - 256, [B | Acc]); +mbb(N, Acc) -> + B = list_to_binary(lists:seq(0, N-1)), + lists:reverse(Acc, B). -- cgit v1.2.3