diff options
author | Hans Bolinder <[email protected]> | 2017-12-07 12:25:53 +0100 |
---|---|---|
committer | Hans Bolinder <[email protected]> | 2017-12-07 12:25:53 +0100 |
commit | e38a9d81b536ecb68bf0c8a409a316aac43937d2 (patch) | |
tree | ee6e658ad785a9e7a21f0d50d55472172458c156 /lib/stdlib | |
parent | b65fe90de1414c2bdec8e7a339a599d111db519d (diff) | |
parent | 7530b72480eb68cc9a5d700ee7f22c5069b61514 (diff) | |
download | otp-e38a9d81b536ecb68bf0c8a409a316aac43937d2.tar.gz otp-e38a9d81b536ecb68bf0c8a409a316aac43937d2.tar.bz2 otp-e38a9d81b536ecb68bf0c8a409a316aac43937d2.zip |
Merge branch 'hasse/stdlib/base64/OTP-14624'
* hasse/stdlib/base64/OTP-14624:
stdlib: Add base64 benchmarks
stdlib: Do not check base64 input more than needed
stdlib: Minor optimization of base64
stdlib: Use binary_to_list in base64 when it is faster
stdlib: Optimize base64 functions
Diffstat (limited to 'lib/stdlib')
-rw-r--r-- | lib/stdlib/src/base64.erl | 597 | ||||
-rw-r--r-- | lib/stdlib/test/base64_SUITE.erl | 109 | ||||
-rw-r--r-- | lib/stdlib/test/stdlib_bench_SUITE.erl | 107 |
3 files changed, 494 insertions, 319 deletions
diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl index c8cf6fdffe..6ea4147abf 100644 --- a/lib/stdlib/src/base64.erl +++ b/lib/stdlib/src/base64.erl @@ -24,22 +24,11 @@ -export([encode/1, decode/1, mime_decode/1, encode_to_string/1, decode_to_string/1, mime_decode_to_string/1]). -%%------------------------------------------------------------------------- %% The following type is a subtype of string() for return values %% of (some) functions of this module. -%%------------------------------------------------------------------------- - -type ascii_string() :: [1..255]. -type ascii_binary() :: binary(). -%%------------------------------------------------------------------------- -%% encode_to_string(ASCII) -> Base64String -%% ASCII - string() | binary() -%% Base64String - string() -%% -%% Description: Encodes a plain ASCII string (or binary) into base64. -%%------------------------------------------------------------------------- - -spec encode_to_string(Data) -> Base64String when Data :: ascii_string() | ascii_binary(), Base64String :: ascii_string(). @@ -47,66 +36,67 @@ encode_to_string(Bin) when is_binary(Bin) -> encode_to_string(binary_to_list(Bin)); encode_to_string(List) when is_list(List) -> - encode_l(List). - -%%------------------------------------------------------------------------- -%% encode(ASCII) -> Base64 -%% ASCII - string() | binary() -%% Base64 - binary() -%% -%% Description: Encodes a plain ASCII string (or binary) into base64. -%%------------------------------------------------------------------------- + encode_list_to_string(List). -spec encode(Data) -> Base64 when Data :: ascii_string() | ascii_binary(), Base64 :: ascii_binary(). encode(Bin) when is_binary(Bin) -> - encode_binary(Bin); + encode_binary(Bin, <<>>); encode(List) when is_list(List) -> - list_to_binary(encode_l(List)). - --spec encode_l(ascii_string()) -> ascii_string(). + encode_list(List, <<>>). -encode_l([]) -> +encode_list_to_string([]) -> []; -encode_l([A]) -> - [b64e(A bsr 2), - b64e((A band 3) bsl 4), $=, $=]; -encode_l([A,B]) -> - [b64e(A bsr 2), - b64e(((A band 3) bsl 4) bor (B bsr 4)), - b64e((B band 15) bsl 2), $=]; -encode_l([A,B,C|Ls]) -> - BB = (A bsl 16) bor (B bsl 8) bor C, +encode_list_to_string([B1]) -> + [b64e(B1 bsr 2), + b64e((B1 band 3) bsl 4), $=, $=]; +encode_list_to_string([B1,B2]) -> + [b64e(B1 bsr 2), + b64e(((B1 band 3) bsl 4) bor (B2 bsr 4)), + b64e((B2 band 15) bsl 2), $=]; +encode_list_to_string([B1,B2,B3|Ls]) -> + BB = (B1 bsl 16) bor (B2 bsl 8) bor B3, [b64e(BB bsr 18), b64e((BB bsr 12) band 63), b64e((BB bsr 6) band 63), - b64e(BB band 63) | encode_l(Ls)]. - -encode_binary(Bin) -> - Split = 3*(byte_size(Bin) div 3), - <<Main0:Split/binary,Rest/binary>> = Bin, - Main = << <<(b64e(C)):8>> || <<C:6>> <= Main0 >>, - case Rest of - <<A:6,B:6,C:4>> -> - <<Main/binary,(b64e(A)):8,(b64e(B)):8,(b64e(C bsl 2)):8,$=:8>>; - <<A:6,B:2>> -> - <<Main/binary,(b64e(A)):8,(b64e(B bsl 4)):8,$=:8,$=:8>>; - <<>> -> - Main - end. + b64e(BB band 63) | encode_list_to_string(Ls)]. -%%------------------------------------------------------------------------- -%% mime_decode(Base64) -> ASCII -%% decode(Base64) -> ASCII -%% Base64 - string() | binary() -%% ASCII - binary() -%% -%% Description: Decodes an base64 encoded string to plain ASCII. -%% mime_decode strips away all characters not Base64 before converting, -%% whereas decode crashes if an illegal character is found -%%------------------------------------------------------------------------- +encode_binary(<<>>, A) -> + A; +encode_binary(<<B1:8>>, A) -> + <<A/bits,(b64e(B1 bsr 2)):8,(b64e((B1 band 3) bsl 4)):8,$=:8,$=:8>>; +encode_binary(<<B1:8, B2:8>>, A) -> + <<A/bits,(b64e(B1 bsr 2)):8, + (b64e(((B1 band 3) bsl 4) bor (B2 bsr 4))):8, + (b64e((B2 band 15) bsl 2)):8, $=:8>>; +encode_binary(<<B1:8, B2:8, B3:8, Ls/bits>>, A) -> + BB = (B1 bsl 16) bor (B2 bsl 8) bor B3, + encode_binary(Ls, + <<A/bits,(b64e(BB bsr 18)):8, + (b64e((BB bsr 12) band 63)):8, + (b64e((BB bsr 6) band 63)):8, + (b64e(BB band 63)):8>>). + +encode_list([], A) -> + A; +encode_list([B1], A) -> + <<A/bits,(b64e(B1 bsr 2)):8,(b64e((B1 band 3) bsl 4)):8,$=:8,$=:8>>; +encode_list([B1,B2], A) -> + <<A/bits,(b64e(B1 bsr 2)):8, + (b64e(((B1 band 3) bsl 4) bor (B2 bsr 4))):8, + (b64e((B2 band 15) bsl 2)):8, $=:8>>; +encode_list([B1,B2,B3|Ls], A) -> + BB = (B1 bsl 16) bor (B2 bsl 8) bor B3, + encode_list(Ls, + <<A/bits,(b64e(BB bsr 18)):8, + (b64e((BB bsr 12) band 63)):8, + (b64e((BB bsr 6) band 63)):8, + (b64e(BB band 63)):8>>). + +%% mime_decode strips away all characters not Base64 before +%% converting, whereas decode crashes if an illegal character is found -spec decode(Base64) -> Data when Base64 :: ascii_string() | ascii_binary(), @@ -122,32 +112,13 @@ decode(List) when is_list(List) -> Data :: ascii_binary(). mime_decode(Bin) when is_binary(Bin) -> - mime_decode_binary(<<>>, Bin); + mime_decode_binary(Bin, <<>>); mime_decode(List) when is_list(List) -> - mime_decode(list_to_binary(List)). + mime_decode_list(List, <<>>). --spec decode_l(ascii_string()) -> ascii_string(). - -decode_l(List) -> - L = strip_spaces(List, []), - decode(L, []). - --spec mime_decode_l(ascii_string()) -> ascii_string(). - -mime_decode_l(List) -> - L = strip_illegal(List, [], 0), - decode(L, []). - -%%------------------------------------------------------------------------- -%% mime_decode_to_string(Base64) -> ASCII -%% decode_to_string(Base64) -> ASCII -%% Base64 - string() | binary() -%% ASCII - binary() -%% -%% Description: Decodes an base64 encoded string to plain ASCII. -%% mime_decode strips away all characters not Base64 before converting, -%% whereas decode crashes if an illegal character is found -%%------------------------------------------------------------------------- +%% mime_decode_to_string strips away all characters not Base64 before +%% converting, whereas decode_to_string crashes if an illegal +%% character is found -spec decode_to_string(Base64) -> DataString when Base64 :: ascii_string() | ascii_binary(), @@ -156,7 +127,7 @@ mime_decode_l(List) -> decode_to_string(Bin) when is_binary(Bin) -> decode_to_string(binary_to_list(Bin)); decode_to_string(List) when is_list(List) -> - decode_l(List). + decode_list_to_string(List). -spec mime_decode_to_string(Base64) -> DataString when Base64 :: ascii_string() | ascii_binary(), @@ -165,115 +136,195 @@ decode_to_string(List) when is_list(List) -> mime_decode_to_string(Bin) when is_binary(Bin) -> mime_decode_to_string(binary_to_list(Bin)); mime_decode_to_string(List) when is_list(List) -> - mime_decode_l(List). - -%% One-based decode map. --define(DECODE_MAP, - {bad,bad,bad,bad,bad,bad,bad,bad,ws,ws,bad,bad,ws,bad,bad, %1-15 - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, %16-31 - ws,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,62,bad,bad,bad,63, %32-47 - 52,53,54,55,56,57,58,59,60,61,bad,bad,bad,eq,bad,bad, %48-63 - bad,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14, - 15,16,17,18,19,20,21,22,23,24,25,bad,bad,bad,bad,bad, - bad,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, - 41,42,43,44,45,46,47,48,49,50,51,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad}). + mime_decode_list_to_string(List). -decode_binary(<<C1:8, Cs/bits>>, A) -> - case element(C1, ?DECODE_MAP) of - ws -> decode_binary(Cs, A); - B1 -> decode_binary(Cs, A, B1) +%% Skipping pad character if not at end of string. Also liberal about +%% excess padding and skipping of other illegal (non-base64 alphabet) +%% characters. See section 3.3 of RFC4648 +mime_decode_list([0 | Cs], A) -> + mime_decode_list(Cs, A); +mime_decode_list([C1 | Cs], A) -> + case b64d(C1) of + B1 when is_integer(B1) -> mime_decode_list(Cs, A, B1); + _ -> mime_decode_list(Cs, A) % eq is padding end; -decode_binary(<<>>, A) -> +mime_decode_list([], A) -> A. -decode_binary(<<C2:8, Cs/bits>>, A, B1) -> - case element(C2, ?DECODE_MAP) of - ws -> decode_binary(Cs, A, B1); - B2 -> decode_binary(Cs, A, B1, B2) +mime_decode_list([0 | Cs], A, B1) -> + mime_decode_list(Cs, A, B1); +mime_decode_list([C2 | Cs], A, B1) -> + case b64d(C2) of + B2 when is_integer(B2) -> + mime_decode_list(Cs, A, B1, B2); + _ -> mime_decode_list(Cs, A, B1) % eq is padding end. -decode_binary(<<C3:8, Cs/bits>>, A, B1, B2) -> - case element(C3, ?DECODE_MAP) of - ws -> decode_binary(Cs, A, B1, B2); - B3 -> decode_binary(Cs, A, B1, B2, B3) +mime_decode_list([0 | Cs], A, B1, B2) -> + mime_decode_list(Cs, A, B1, B2); +mime_decode_list([C3 | Cs], A, B1, B2) -> + case b64d(C3) of + B3 when is_integer(B3) -> + mime_decode_list(Cs, A, B1, B2, B3); + eq=B3 -> + mime_decode_list_after_eq(Cs, A, B1, B2, B3); + _ -> mime_decode_list(Cs, A, B1, B2) end. -decode_binary(<<C4:8, Cs/bits>>, A, B1, B2, B3) -> - case element(C4, ?DECODE_MAP) of - ws -> decode_binary(Cs, A, B1, B2, B3); - eq when B3 =:= eq -> only_ws_binary(Cs, <<A/binary,B1:6,(B2 bsr 4):2>>); - eq -> only_ws_binary(Cs, <<A/binary,B1:6,B2:6,(B3 bsr 2):4>>); - B4 -> decode_binary(Cs, <<A/binary,B1:6,B2:6,B3:6,B4:6>>) +mime_decode_list([0 | Cs], A, B1, B2, B3) -> + mime_decode_list(Cs, A, B1, B2, B3); +mime_decode_list([C4 | Cs], A, B1, B2, B3) -> + case b64d(C4) of + B4 when is_integer(B4) -> + mime_decode_list(Cs, <<A/bits,B1:6,B2:6,B3:6,B4:6>>); + eq -> + mime_decode_list_after_eq(Cs, A, B1, B2, B3); + _ -> mime_decode_list(Cs, A, B1, B2, B3) end. -only_ws_binary(<<>>, A) -> - A; -only_ws_binary(<<C:8, Cs/bits>>, A) -> - case element(C, ?DECODE_MAP) of - ws -> only_ws_binary(Cs, A); - _ -> erlang:error(function_clause) +mime_decode_list_after_eq([0 | Cs], A, B1, B2, B3) -> + mime_decode_list_after_eq(Cs, A, B1, B2, B3); +mime_decode_list_after_eq([C | Cs], A, B1, B2, B3) -> + case b64d(C) of + B when is_integer(B) -> + %% More valid data, skip the eq as invalid + case B3 of + eq -> mime_decode_list(Cs, A, B1, B2, B); + _ -> mime_decode_list(Cs, <<A/bits,B1:6,B2:6,B3:6,B:6>>) + end; + _ -> mime_decode_list_after_eq(Cs, A, B1, B2, B3) + end; +mime_decode_list_after_eq([], A, B1, B2, eq) -> + <<A/bits,B1:6,(B2 bsr 4):2>>; +mime_decode_list_after_eq([], A, B1, B2, B3) -> + <<A/bits,B1:6,B2:6,(B3 bsr 2):4>>. + +mime_decode_binary(<<0:8, Cs/bits>>, A) -> + mime_decode_binary(Cs, A); +mime_decode_binary(<<C1:8, Cs/bits>>, A) -> + case b64d(C1) of + B1 when is_integer(B1) -> mime_decode_binary(Cs, A, B1); + _ -> mime_decode_binary(Cs, A) % eq is padding + end; +mime_decode_binary(<<>>, A) -> + A. + +mime_decode_binary(<<0:8, Cs/bits>>, A, B1) -> + mime_decode_binary(Cs, A, B1); +mime_decode_binary(<<C2:8, Cs/bits>>, A, B1) -> + case b64d(C2) of + B2 when is_integer(B2) -> + mime_decode_binary(Cs, A, B1, B2); + _ -> mime_decode_binary(Cs, A, B1) % eq is padding end. -%% Skipping pad character if not at end of string. Also liberal about -%% excess padding and skipping of other illegal (non-base64 alphabet) -%% characters. See section 3.3 of RFC4648 -mime_decode_binary(Result, <<0:8,T/bits>>) -> - mime_decode_binary(Result, T); -mime_decode_binary(Result0, <<C:8,T/bits>>) -> - case element(C, ?DECODE_MAP) of - Bits when is_integer(Bits) -> - mime_decode_binary(<<Result0/bits,Bits:6>>, T); +mime_decode_binary(<<0:8, Cs/bits>>, A, B1, B2) -> + mime_decode_binary(Cs, A, B1, B2); +mime_decode_binary(<<C3:8, Cs/bits>>, A, B1, B2) -> + case b64d(C3) of + B3 when is_integer(B3) -> + mime_decode_binary(Cs, A, B1, B2, B3); + eq=B3 -> + mime_decode_binary_after_eq(Cs, A, B1, B2, B3); + _ -> mime_decode_binary(Cs, A, B1, B2) + end. + +mime_decode_binary(<<0:8, Cs/bits>>, A, B1, B2, B3) -> + mime_decode_binary(Cs, A, B1, B2, B3); +mime_decode_binary(<<C4:8, Cs/bits>>, A, B1, B2, B3) -> + case b64d(C4) of + B4 when is_integer(B4) -> + mime_decode_binary(Cs, <<A/bits,B1:6,B2:6,B3:6,B4:6>>); eq -> - mime_decode_binary_after_eq(Result0, T, false); - _ -> - mime_decode_binary(Result0, T) + mime_decode_binary_after_eq(Cs, A, B1, B2, B3); + _ -> mime_decode_binary(Cs, A, B1, B2, B3) + end. + +mime_decode_binary_after_eq(<<0:8, Cs/bits>>, A, B1, B2, B3) -> + mime_decode_binary_after_eq(Cs, A, B1, B2, B3); +mime_decode_binary_after_eq(<<C:8, Cs/bits>>, A, B1, B2, B3) -> + case b64d(C) of + B when is_integer(B) -> + %% More valid data, skip the eq as invalid + case B3 of + eq -> mime_decode_binary(Cs, A, B1, B2, B); + _ -> mime_decode_binary(Cs, <<A/bits,B1:6,B2:6,B3:6,B:6>>) + end; + _ -> mime_decode_binary_after_eq(Cs, A, B1, B2, B3) end; -mime_decode_binary(Result, _) -> - true = is_binary(Result), - Result. - -mime_decode_binary_after_eq(Result, <<0:8,T/bits>>, Eq) -> - mime_decode_binary_after_eq(Result, T, Eq); -mime_decode_binary_after_eq(Result0, <<C:8,T/bits>>, Eq) -> - case element(C, ?DECODE_MAP) of - bad -> - mime_decode_binary_after_eq(Result0, T, Eq); - ws -> - mime_decode_binary_after_eq(Result0, T, Eq); +mime_decode_binary_after_eq(<<>>, A, B1, B2, eq) -> + <<A/bits,B1:6,(B2 bsr 4):2>>; +mime_decode_binary_after_eq(<<>>, A, B1, B2, B3) -> + <<A/bits,B1:6,B2:6,(B3 bsr 2):4>>. + +mime_decode_list_to_string([0 | Cs]) -> + mime_decode_list_to_string(Cs); +mime_decode_list_to_string([C1 | Cs]) -> + case b64d(C1) of + B1 when is_integer(B1) -> mime_decode_list_to_string(Cs, B1); + _ -> mime_decode_list_to_string(Cs) % eq is padding + end; +mime_decode_list_to_string([]) -> + []. + +mime_decode_list_to_string([0 | Cs], B1) -> + mime_decode_list_to_string(Cs, B1); +mime_decode_list_to_string([C2 | Cs], B1) -> + case b64d(C2) of + B2 when is_integer(B2) -> + mime_decode_list_to_string(Cs, B1, B2); + _ -> mime_decode_list_to_string(Cs, B1) % eq is padding + end. + +mime_decode_list_to_string([0 | Cs], B1, B2) -> + mime_decode_list_to_string(Cs, B1, B2); +mime_decode_list_to_string([C3 | Cs], B1, B2) -> + case b64d(C3) of + B3 when is_integer(B3) -> + mime_decode_list_to_string(Cs, B1, B2, B3); + eq=B3 -> mime_decode_list_to_string_after_eq(Cs, B1, B2, B3); + _ -> mime_decode_list_to_string(Cs, B1, B2) + end. + +mime_decode_list_to_string([0 | Cs], B1, B2, B3) -> + mime_decode_list_to_string(Cs, B1, B2, B3); +mime_decode_list_to_string([C4 | Cs], B1, B2, B3) -> + case b64d(C4) of + B4 when is_integer(B4) -> + Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4, + Octet1 = Bits4x6 bsr 16, + Octet2 = (Bits4x6 bsr 8) band 16#ff, + Octet3 = Bits4x6 band 16#ff, + [Octet1, Octet2, Octet3 | mime_decode_list_to_string(Cs)]; eq -> - mime_decode_binary_after_eq(Result0, T, true); - Bits when is_integer(Bits) -> + mime_decode_list_to_string_after_eq(Cs, B1, B2, B3); + _ -> mime_decode_list_to_string(Cs, B1, B2, B3) + end. + +mime_decode_list_to_string_after_eq([0 | Cs], B1, B2, B3) -> + mime_decode_list_to_string_after_eq(Cs, B1, B2, B3); +mime_decode_list_to_string_after_eq([C | Cs], B1, B2, B3) -> + case b64d(C) of + B when is_integer(B) -> %% More valid data, skip the eq as invalid - mime_decode_binary(<<Result0/bits,Bits:6>>, T) + case B3 of + eq -> mime_decode_list_to_string(Cs, B1, B2, B); + _ -> + Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B, + Octet1 = Bits4x6 bsr 16, + Octet2 = (Bits4x6 bsr 8) band 16#ff, + Octet3 = Bits4x6 band 16#ff, + [Octet1, Octet2, Octet3 | mime_decode_list_to_string(Cs)] + end; + _ -> mime_decode_list_to_string_after_eq(Cs, B1, B2, B3) end; -mime_decode_binary_after_eq(Result0, <<>>, Eq) -> - %% No more valid data. - case bit_size(Result0) rem 8 of - 0 -> - %% '====' is not uncommon. - Result0; - 4 when Eq -> - %% enforce at least one more '=' only ignoring illegals and spacing - Split = byte_size(Result0) - 1, - <<Result:Split/bytes,_:4>> = Result0, - Result; - 2 -> - %% remove 2 bits - Split = byte_size(Result0) - 1, - <<Result:Split/bytes,_:2>> = Result0, - Result - end. +mime_decode_list_to_string_after_eq([], B1, B2, eq) -> + binary_to_list(<<B1:6,(B2 bsr 4):2>>); +mime_decode_list_to_string_after_eq([], B1, B2, B3) -> + binary_to_list(<<B1:6,B2:6,(B3 bsr 2):4>>). decode_list([C1 | Cs], A) -> - case element(C1, ?DECODE_MAP) of + case b64d(C1) of ws -> decode_list(Cs, A); B1 -> decode_list(Cs, A, B1) end; @@ -281,122 +332,130 @@ decode_list([], A) -> A. decode_list([C2 | Cs], A, B1) -> - case element(C2, ?DECODE_MAP) of + case b64d(C2) of ws -> decode_list(Cs, A, B1); B2 -> decode_list(Cs, A, B1, B2) end. decode_list([C3 | Cs], A, B1, B2) -> - case element(C3, ?DECODE_MAP) of + case b64d(C3) of ws -> decode_list(Cs, A, B1, B2); B3 -> decode_list(Cs, A, B1, B2, B3) end. decode_list([C4 | Cs], A, B1, B2, B3) -> - case element(C4, ?DECODE_MAP) of + case b64d(C4) of ws -> decode_list(Cs, A, B1, B2, B3); - eq when B3 =:= eq -> only_ws(Cs, <<A/binary,B1:6,(B2 bsr 4):2>>); - eq -> only_ws(Cs, <<A/binary,B1:6,B2:6,(B3 bsr 2):4>>); - B4 -> decode_list(Cs, <<A/binary,B1:6,B2:6,B3:6,B4:6>>) + eq when B3 =:= eq -> only_ws(Cs, <<A/bits,B1:6,(B2 bsr 4):2>>); + eq -> only_ws(Cs, <<A/bits,B1:6,B2:6,(B3 bsr 2):4>>); + B4 -> decode_list(Cs, <<A/bits,B1:6,B2:6,B3:6,B4:6>>) end. -only_ws([], A) -> - A; -only_ws([C | Cs], A) -> - case element(C, ?DECODE_MAP) of - ws -> only_ws(Cs, A); - _ -> erlang:error(function_clause) - end. +decode_binary(<<C1:8, Cs/bits>>, A) -> + case b64d(C1) of + ws -> decode_binary(Cs, A); + B1 -> decode_binary(Cs, A, B1) + end; +decode_binary(<<>>, A) -> + A. -decode([], A) -> A; -decode([$=,$=,C2,C1|Cs], A) -> - Bits2x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12), - Octet1 = Bits2x6 bsr 16, - decode(Cs, [Octet1|A]); -decode([$=,C3,C2,C1|Cs], A) -> - Bits3x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12) - bor (b64d(C3) bsl 6), - Octet1 = Bits3x6 bsr 16, - Octet2 = (Bits3x6 bsr 8) band 16#ff, - decode(Cs, [Octet1,Octet2|A]); -decode([C4,C3,C2,C1| Cs], A) -> - Bits4x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12) - bor (b64d(C3) bsl 6) bor b64d(C4), - Octet1 = Bits4x6 bsr 16, - Octet2 = (Bits4x6 bsr 8) band 16#ff, - Octet3 = Bits4x6 band 16#ff, - decode(Cs, [Octet1,Octet2,Octet3|A]). +decode_binary(<<C2:8, Cs/bits>>, A, B1) -> + case b64d(C2) of + ws -> decode_binary(Cs, A, B1); + B2 -> decode_binary(Cs, A, B1, B2) + end. -%%%======================================================================== -%%% Internal functions -%%%======================================================================== +decode_binary(<<C3:8, Cs/bits>>, A, B1, B2) -> + case b64d(C3) of + ws -> decode_binary(Cs, A, B1, B2); + B3 -> decode_binary(Cs, A, B1, B2, B3) + end. -strip_spaces([], A) -> A; -strip_spaces([$\s|Cs], A) -> strip_spaces(Cs, A); -strip_spaces([$\t|Cs], A) -> strip_spaces(Cs, A); -strip_spaces([$\r|Cs], A) -> strip_spaces(Cs, A); -strip_spaces([$\n|Cs], A) -> strip_spaces(Cs, A); -strip_spaces([C|Cs], A) -> strip_spaces(Cs, [C | A]). +decode_binary(<<C4:8, Cs/bits>>, A, B1, B2, B3) -> + case b64d(C4) of + ws -> decode_binary(Cs, A, B1, B2, B3); + eq when B3 =:= eq -> only_ws_binary(Cs, <<A/bits,B1:6,(B2 bsr 4):2>>); + eq -> only_ws_binary(Cs, <<A/bits,B1:6,B2:6,(B3 bsr 2):4>>); + B4 -> decode_binary(Cs, <<A/bits,B1:6,B2:6,B3:6,B4:6>>) + end. -%% Skipping pad character if not at end of string. Also liberal about -%% excess padding and skipping of other illegal (non-base64 alphabet) -%% characters. See section 3.3 of RFC4648 -strip_illegal([], A, _Cnt) -> +only_ws_binary(<<>>, A) -> A; -strip_illegal([0|Cs], A, Cnt) -> - strip_illegal(Cs, A, Cnt); -strip_illegal([C|Cs], A, Cnt) -> - case element(C, ?DECODE_MAP) of - bad -> - strip_illegal(Cs, A, Cnt); - ws -> - strip_illegal(Cs, A, Cnt); - eq -> - case {tail_contains_more(Cs, false), Cnt rem 4} of - {{[], _}, 0} -> - A; %% Ignore extra = - {{[], true}, 2} -> - [$=|[$=|A]]; %% 'XX==' - {{[], _}, 3} -> - [$=|A]; %% 'XXX=' - {{[H|T], _}, _} -> - %% more data, skip equals - strip_illegal(T, [H|A], Cnt+1) - end; - _ -> - strip_illegal(Cs, [C|A], Cnt+1) +only_ws_binary(<<C:8, Cs/bits>>, A) -> + case b64d(C) of + ws -> only_ws_binary(Cs, A) end. -%% Search the tail for more valid data and remember if we saw -%% another equals along the way. -tail_contains_more([], Eq) -> - {[], Eq}; -tail_contains_more(<<>>, Eq) -> - {<<>>, Eq}; -tail_contains_more([C|T]=More, Eq) -> - case element(C, ?DECODE_MAP) of - bad -> - tail_contains_more(T, Eq); - ws -> - tail_contains_more(T, Eq); - eq -> - tail_contains_more(T, true); - _ -> - {More, Eq} +decode_list_to_string([C1 | Cs]) -> + case b64d(C1) of + ws -> decode_list_to_string(Cs); + B1 -> decode_list_to_string(Cs, B1) end; -tail_contains_more(<<C:8,T/bits>> =More, Eq) -> - case element(C, ?DECODE_MAP) of - bad -> - tail_contains_more(T, Eq); - ws -> - tail_contains_more(T, Eq); - eq -> - tail_contains_more(T, true); - _ -> - {More, Eq} +decode_list_to_string([]) -> + []. + +decode_list_to_string([C2 | Cs], B1) -> + case b64d(C2) of + ws -> decode_list_to_string(Cs, B1); + B2 -> decode_list_to_string(Cs, B1, B2) + end. + +decode_list_to_string([C3 | Cs], B1, B2) -> + case b64d(C3) of + ws -> decode_list_to_string(Cs, B1, B2); + B3 -> decode_list_to_string(Cs, B1, B2, B3) + end. + +decode_list_to_string([C4 | Cs], B1, B2, B3) -> + case b64d(C4) of + ws -> + decode_list_to_string(Cs, B1, B2, B3); + eq when B3 =:= eq -> + only_ws(Cs, binary_to_list(<<B1:6,(B2 bsr 4):2>>)); + eq -> + only_ws(Cs, binary_to_list(<<B1:6,B2:6,(B3 bsr 2):4>>)); + B4 -> + Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4, + Octet1 = Bits4x6 bsr 16, + Octet2 = (Bits4x6 bsr 8) band 16#ff, + Octet3 = Bits4x6 band 16#ff, + [Octet1, Octet2, Octet3 | decode_list_to_string(Cs)] + end. + +only_ws([], A) -> + A; +only_ws([C | Cs], A) -> + case b64d(C) of + ws -> only_ws(Cs, A) end. - + +%%%======================================================================== +%%% Internal functions +%%%======================================================================== + %% accessors +-compile({inline, [{b64d, 1}]}). +%% One-based decode map. +b64d(X) -> + element(X, + {bad,bad,bad,bad,bad,bad,bad,bad,ws,ws,bad,bad,ws,bad,bad, %1-15 + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, %16-31 + ws,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,62,bad,bad,bad,63, %32-47 + 52,53,54,55,56,57,58,59,60,61,bad,bad,bad,eq,bad,bad, %48-63 + bad,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14, + 15,16,17,18,19,20,21,22,23,24,25,bad,bad,bad,bad,bad, + bad,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, + 41,42,43,44,45,46,47,48,49,50,51,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad}). + +-compile({inline, [{b64e, 1}]}). b64e(X) -> element(X+1, {$A, $B, $C, $D, $E, $F, $G, $H, $I, $J, $K, $L, $M, $N, @@ -404,9 +463,3 @@ b64e(X) -> $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p, $q, $r, $s, $t, $u, $v, $w, $x, $y, $z, $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $+, $/}). - - -b64d(X) -> - b64d_ok(element(X, ?DECODE_MAP)). - -b64d_ok(I) when is_integer(I) -> I. diff --git a/lib/stdlib/test/base64_SUITE.erl b/lib/stdlib/test/base64_SUITE.erl index 48b3f5f959..1fc4c3fc0e 100644 --- a/lib/stdlib/test/base64_SUITE.erl +++ b/lib/stdlib/test/base64_SUITE.erl @@ -97,10 +97,9 @@ base64_otp_5635(Config) when is_list(Config) -> <<"===">> = base64:decode(base64:encode("===")), ok. %%------------------------------------------------------------------------- -%% OTP-6279: Guard needed so that function fails in a correct -%% way for faulty input, i.e. function_clause. +%% OTP-6279: Make sure illegal characters are rejected when decoding. base64_otp_6279(Config) when is_list(Config) -> - {'EXIT',{function_clause, _}} = (catch base64:decode("dGVzda==a")), + {'EXIT',_} = (catch base64:decode("dGVzda==a")), ok. %%------------------------------------------------------------------------- %% Encode and decode big binaries. @@ -115,48 +114,61 @@ big(Config) when is_list(Config) -> %%------------------------------------------------------------------------- %% Make sure illegal characters are rejected when decoding. illegal(Config) when is_list(Config) -> - {'EXIT',{function_clause, _}} = (catch base64:decode("()")), + %% A few samples with different error reasons. Nothing can be + %% assumed about the reason for the crash. + {'EXIT',_} = (catch base64:decode("()")), + {'EXIT',_} = (catch base64:decode(<<19:8,20:8,21:8,22:8>>)), + {'EXIT',_} = (catch base64:decode([19,20,21,22])), + {'EXIT',_} = (catch base64:decode_to_string(<<19:8,20:8,21:8,22:8>>)), + {'EXIT',_} = (catch base64:decode_to_string([19,20,21,22])), ok. %%------------------------------------------------------------------------- %% mime_decode and mime_decode_to_string have different implementations -%% so test both with the same input separately. Both functions have -%% the same implementation for binary/string arguments. +%% so test both with the same input separately. %% %% Test base64:mime_decode/1. mime_decode(Config) when is_list(Config) -> + MimeDecode = fun(In) -> + Out = base64:mime_decode(In), + Out = base64:mime_decode(binary_to_list(In)) + end, %% Test correct padding - <<"one">> = base64:mime_decode(<<"b25l">>), - <<"on">> = base64:mime_decode(<<"b24=">>), - <<"o">> = base64:mime_decode(<<"bw==">>), + <<"one">> = MimeDecode(<<"b25l">>), + <<"on">> = MimeDecode(<<"b24=">>), + <<"o">> = MimeDecode(<<"bw==">>), %% Test 1 extra padding - <<"one">> = base64:mime_decode(<<"b25l= =">>), - <<"on">> = base64:mime_decode(<<"b24== =">>), - <<"o">> = base64:mime_decode(<<"bw=== =">>), + <<"one">> = MimeDecode(<<"b25l= =">>), + <<"on">> = MimeDecode(<<"b24== =">>), + <<"o">> = MimeDecode(<<"bw=== =">>), %% Test 2 extra padding - <<"one">> = base64:mime_decode(<<"b25l===">>), - <<"on">> = base64:mime_decode(<<"b24====">>), - <<"o">> = base64:mime_decode(<<"bw=====">>), + <<"one">> = MimeDecode(<<"b25l===">>), + <<"on">> = MimeDecode(<<"b24====">>), + <<"o">> = MimeDecode(<<"bw=====">>), %% Test misc embedded padding - <<"one">> = base64:mime_decode(<<"b2=5l===">>), - <<"on">> = base64:mime_decode(<<"b=24====">>), - <<"o">> = base64:mime_decode(<<"b=w=====">>), + <<"one">> = MimeDecode(<<"b2=5l===">>), + <<"on">> = MimeDecode(<<"b=24====">>), + <<"o">> = MimeDecode(<<"b=w=====">>), %% Test misc white space and illegals with embedded padding - <<"one">> = base64:mime_decode(<<" b~2=\r\n5()l===">>), - <<"on">> = base64:mime_decode(<<"\tb =2\"¤4=¤= ==">>), - <<"o">> = base64:mime_decode(<<"\nb=w=====">>), + <<"one">> = MimeDecode(<<" b~2=\r\n5()l===">>), + <<"on">> = MimeDecode(<<"\tb =2\"¤4=¤= ==">>), + <<"o">> = MimeDecode(<<"\nb=w=====">>), %% Two pads <<"Aladdin:open sesame">> = - base64:mime_decode("QWxhZGRpbjpvc()GVuIHNlc2FtZQ=="), + MimeDecode(<<"QWxhZGRpbjpvc()GVuIHNlc2FtZQ==">>), %% One pad to ignore, followed by more text - <<"Hello World!!">> = base64:mime_decode(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>), + <<"Hello World!!">> = MimeDecode(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>), %% No pad <<"Aladdin:open sesam">> = - base64:mime_decode("QWxhZGRpbjpvcG¤\")(VuIHNlc2Ft"), + MimeDecode(<<"QWxhZGRpbjpvcG¤\")(VuIHNlc2Ft">>), %% Encoded base 64 strings may be divided by non base 64 chars. %% In this cases whitespaces. <<"0123456789!@#0^&*();:<>,. []{}">> = - base64:mime_decode( + MimeDecode( <<"MDEy MzQ1Njc4 \tOSFAIzBeJ \nio)(oKTs6 PD4sLi \r\nBbXXt9">>), + %% Zeroes + <<"012">> = MimeDecode(<<"\000M\000D\000E\000y=\000">>), + <<"o">> = MimeDecode(<<"bw==\000">>), + <<"o">> = MimeDecode(<<"bw=\000=">>), ok. %%------------------------------------------------------------------------- @@ -165,39 +177,48 @@ mime_decode(Config) when is_list(Config) -> %% Test base64:mime_decode_to_string/1. mime_decode_to_string(Config) when is_list(Config) -> + MimeDecodeToString = + fun(In) -> + Out = base64:mime_decode_to_string(In), + Out = base64:mime_decode_to_string(binary_to_list(In)) + end, %% Test correct padding - "one" = base64:mime_decode_to_string(<<"b25l">>), - "on" = base64:mime_decode_to_string(<<"b24=">>), - "o" = base64:mime_decode_to_string(<<"bw==">>), + "one" = MimeDecodeToString(<<"b25l">>), + "on" = MimeDecodeToString(<<"b24=">>), + "o" = MimeDecodeToString(<<"bw==">>), %% Test 1 extra padding - "one" = base64:mime_decode_to_string(<<"b25l= =">>), - "on" = base64:mime_decode_to_string(<<"b24== =">>), - "o" = base64:mime_decode_to_string(<<"bw=== =">>), + "one" = MimeDecodeToString(<<"b25l= =">>), + "on" = MimeDecodeToString(<<"b24== =">>), + "o" = MimeDecodeToString(<<"bw=== =">>), %% Test 2 extra padding - "one" = base64:mime_decode_to_string(<<"b25l===">>), - "on" = base64:mime_decode_to_string(<<"b24====">>), - "o" = base64:mime_decode_to_string(<<"bw=====">>), + "one" = MimeDecodeToString(<<"b25l===">>), + "on" = MimeDecodeToString(<<"b24====">>), + "o" = MimeDecodeToString(<<"bw=====">>), %% Test misc embedded padding - "one" = base64:mime_decode_to_string(<<"b2=5l===">>), - "on" = base64:mime_decode_to_string(<<"b=24====">>), - "o" = base64:mime_decode_to_string(<<"b=w=====">>), + "one" = MimeDecodeToString(<<"b2=5l===">>), + "on" = MimeDecodeToString(<<"b=24====">>), + "o" = MimeDecodeToString(<<"b=w=====">>), %% Test misc white space and illegals with embedded padding - "one" = base64:mime_decode_to_string(<<" b~2=\r\n5()l===">>), - "on" = base64:mime_decode_to_string(<<"\tb =2\"¤4=¤= ==">>), - "o" = base64:mime_decode_to_string(<<"\nb=w=====">>), + "one" = MimeDecodeToString(<<" b~2=\r\n5()l===">>), + "on" = MimeDecodeToString(<<"\tb =2\"¤4=¤= ==">>), + "o" = MimeDecodeToString(<<"\nb=w=====">>), %% Two pads "Aladdin:open sesame" = - base64:mime_decode_to_string("QWxhZGRpbjpvc()GVuIHNlc2FtZQ=="), + MimeDecodeToString(<<"QWxhZGRpbjpvc()GVuIHNlc2FtZQ==">>), %% One pad to ignore, followed by more text - "Hello World!!" = base64:mime_decode_to_string(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>), + "Hello World!!" = MimeDecodeToString(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>), %% No pad "Aladdin:open sesam" = - base64:mime_decode_to_string("QWxhZGRpbjpvcG¤\")(VuIHNlc2Ft"), + MimeDecodeToString(<<"QWxhZGRpbjpvcG¤\")(VuIHNlc2Ft">>), %% Encoded base 64 strings may be divided by non base 64 chars. %% In this cases whitespaces. "0123456789!@#0^&*();:<>,. []{}" = - base64:mime_decode_to_string( + MimeDecodeToString( <<"MDEy MzQ1Njc4 \tOSFAIzBeJ \nio)(oKTs6 PD4sLi \r\nBbXXt9">>), + %% Zeroes + "012" = MimeDecodeToString(<<"\000M\000D\000E\000y=\000">>), + "o" = MimeDecodeToString(<<"bw==\000">>), + "o" = MimeDecodeToString(<<"bw=\000=">>), ok. %%------------------------------------------------------------------------- diff --git a/lib/stdlib/test/stdlib_bench_SUITE.erl b/lib/stdlib/test/stdlib_bench_SUITE.erl index 8670e7029c..2a9981bb9e 100644 --- a/lib/stdlib/test/stdlib_bench_SUITE.erl +++ b/lib/stdlib/test/stdlib_bench_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2012-2016. All Rights Reserved. +%% Copyright Ericsson AB 2012-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -28,13 +28,20 @@ suite() -> [{ct_hooks,[{ts_install_cth,[{nodenames,2}]}]}]. all() -> - [{group,unicode}]. + [{group,unicode}, {group,base64}]. groups() -> [{unicode,[{repeat,5}], [norm_nfc_list, norm_nfc_deep_l, norm_nfc_binary, string_lexemes_list, string_lexemes_binary - ]}]. + ]}, + {base64,[{repeat,5}], + [decode_binary, decode_binary_to_string, + decode_list, decode_list_to_string, + encode_binary, encode_binary_to_string, + encode_list, encode_list_to_string, + mime_binary_decode, mime_binary_decode_to_string, + mime_list_decode, mime_list_decode_to_string]}]. init_per_group(_GroupName, Config) -> Config. @@ -105,3 +112,97 @@ norm_data(Config) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +decode_binary(_Config) -> + test(decode, encoded_binary()). + +decode_binary_to_string(_Config) -> + test(decode_to_string, encoded_binary()). + +decode_list(_Config) -> + test(decode, encoded_list()). + +decode_list_to_string(_Config) -> + test(decode_to_string, encoded_list()). + +encode_binary(_Config) -> + test(encode, binary()). + +encode_binary_to_string(_Config) -> + test(encode_to_string, binary()). + +encode_list(_Config) -> + test(encode, list()). + +encode_list_to_string(_Config) -> + test(encode_to_string, list()). + +mime_binary_decode(_Config) -> + test(mime_decode, encoded_binary()). + +mime_binary_decode_to_string(_Config) -> + test(mime_decode_to_string, encoded_binary()). + +mime_list_decode(_Config) -> + test(mime_decode, encoded_list()). + +mime_list_decode_to_string(_Config) -> + test(mime_decode_to_string, encoded_list()). + +-define(SIZE, 10000). +-define(N, 1000). + +encoded_binary() -> + list_to_binary(encoded_list()). + +encoded_list() -> + L = random_byte_list(round(?SIZE*0.75)), + base64:encode_to_string(L). + +binary() -> + list_to_binary(list()). + +list() -> + random_byte_list(?SIZE). + +test(Func, Data) -> + F = fun() -> loop(?N, Func, Data) end, + {Time, ok} = timer:tc(fun() -> lspawn(F) end), + report_base64(Time). + +loop(0, _F, _D) -> garbage_collect(), ok; +loop(N, F, D) -> + _ = base64:F(D), + loop(N - 1, F, D). + +lspawn(Fun) -> + {Pid, Ref} = spawn_monitor(fun() -> exit(Fun()) end), + receive + {'DOWN', Ref, process, Pid, Rep} -> Rep + end. + +report_base64(Time) -> + Tps = round((?N*1000000)/Time), + ct_event:notify(#event{name = benchmark_data, + data = [{suite, "stdlib_base64"}, + {value, Tps}]}), + Tps. + +%% Copied from base64_SUITE.erl. + +random_byte_list(N) -> + random_byte_list(N, []). + +random_byte_list(0, Acc) -> + Acc; +random_byte_list(N, Acc) -> + random_byte_list(N-1, [rand:uniform(255)|Acc]). + +make_big_binary(N) -> + list_to_binary(mbb(N, [])). + +mbb(N, Acc) when N > 256 -> + B = list_to_binary(lists:seq(0, 255)), + mbb(N - 256, [B | Acc]); +mbb(N, Acc) -> + B = list_to_binary(lists:seq(0, N-1)), + lists:reverse(Acc, B). |