From 3d28da78f3e8bab7a13f4f928cdd7747ab943197 Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Mon, 30 Oct 2017 11:55:42 +0100 Subject: stdlib: Optimize base64 functions A few test cases with zeroes are added. They were not handled correctly before. The access of DECODE_MAP is moved into the inlined function b64d, for symmetry. The function b64e is also inlined. The speed-up is small, but measurable. Note: encode(List), decode(List) and mime_decode(List) no longer call list_to_binary. This can break code that calls the functions with I/O-lists as input. --- lib/stdlib/src/base64.erl | 699 ++++++++++++++++++++++++--------------- lib/stdlib/test/base64_SUITE.erl | 96 +++--- 2 files changed, 489 insertions(+), 306 deletions(-) diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl index c8cf6fdffe..6311f3c86b 100644 --- a/lib/stdlib/src/base64.erl +++ b/lib/stdlib/src/base64.erl @@ -24,89 +24,95 @@ -export([encode/1, decode/1, mime_decode/1, encode_to_string/1, decode_to_string/1, mime_decode_to_string/1]). -%%------------------------------------------------------------------------- %% The following type is a subtype of string() for return values %% of (some) functions of this module. -%%------------------------------------------------------------------------- - -type ascii_string() :: [1..255]. -type ascii_binary() :: binary(). -%%------------------------------------------------------------------------- -%% encode_to_string(ASCII) -> Base64String -%% ASCII - string() | binary() -%% Base64String - string() -%% -%% Description: Encodes a plain ASCII string (or binary) into base64. -%%------------------------------------------------------------------------- - -spec encode_to_string(Data) -> Base64String when Data :: ascii_string() | ascii_binary(), Base64String :: ascii_string(). encode_to_string(Bin) when is_binary(Bin) -> - encode_to_string(binary_to_list(Bin)); + encode_binary_to_string(Bin); encode_to_string(List) when is_list(List) -> - encode_l(List). - -%%------------------------------------------------------------------------- -%% encode(ASCII) -> Base64 -%% ASCII - string() | binary() -%% Base64 - binary() -%% -%% Description: Encodes a plain ASCII string (or binary) into base64. -%%------------------------------------------------------------------------- + encode_list_to_string(List). -spec encode(Data) -> Base64 when Data :: ascii_string() | ascii_binary(), Base64 :: ascii_binary(). encode(Bin) when is_binary(Bin) -> - encode_binary(Bin); + encode_binary(Bin, <<>>); encode(List) when is_list(List) -> - list_to_binary(encode_l(List)). + encode_list(List, <<>>). --spec encode_l(ascii_string()) -> ascii_string(). +encode_binary_to_string(<<>>) -> + []; +encode_binary_to_string(<>) -> + [b64e(B1 bsr 2), + b64e((B1 band 3) bsl 4), $=, $=]; +encode_binary_to_string(<>) -> + [b64e(B1 bsr 2), + b64e(((B1 band 3) bsl 4) bor (B2 bsr 4)), + b64e((B2 band 15) bsl 2), $=]; +encode_binary_to_string(<>) -> + BB = (B1 bsl 16) bor (B2 bsl 8) bor B3, + [b64e(BB bsr 18), + b64e((BB bsr 12) band 63), + b64e((BB bsr 6) band 63), + b64e(BB band 63) | encode_binary_to_string(Ls)]. -encode_l([]) -> +encode_list_to_string([]) -> []; -encode_l([A]) -> - [b64e(A bsr 2), - b64e((A band 3) bsl 4), $=, $=]; -encode_l([A,B]) -> - [b64e(A bsr 2), - b64e(((A band 3) bsl 4) bor (B bsr 4)), - b64e((B band 15) bsl 2), $=]; -encode_l([A,B,C|Ls]) -> - BB = (A bsl 16) bor (B bsl 8) bor C, +encode_list_to_string([B1]) -> + [b64e(B1 bsr 2), + b64e((B1 band 3) bsl 4), $=, $=]; +encode_list_to_string([B1,B2]) -> + [b64e(B1 bsr 2), + b64e(((B1 band 3) bsl 4) bor (B2 bsr 4)), + b64e((B2 band 15) bsl 2), $=]; +encode_list_to_string([B1,B2,B3|Ls]) -> + BB = (B1 bsl 16) bor (B2 bsl 8) bor B3, [b64e(BB bsr 18), b64e((BB bsr 12) band 63), b64e((BB bsr 6) band 63), - b64e(BB band 63) | encode_l(Ls)]. - -encode_binary(Bin) -> - Split = 3*(byte_size(Bin) div 3), - <> = Bin, - Main = << <<(b64e(C)):8>> || <> <= Main0 >>, - case Rest of - <> -> - <
>; - <> -> - <
>; - <<>> -> - Main - end. + b64e(BB band 63) | encode_list_to_string(Ls)]. -%%------------------------------------------------------------------------- -%% mime_decode(Base64) -> ASCII -%% decode(Base64) -> ASCII -%% Base64 - string() | binary() -%% ASCII - binary() -%% -%% Description: Decodes an base64 encoded string to plain ASCII. -%% mime_decode strips away all characters not Base64 before converting, -%% whereas decode crashes if an illegal character is found -%%------------------------------------------------------------------------- +encode_binary(<<>>, A) -> + A; +encode_binary(<>, A) -> + <>; +encode_binary(<>, A) -> + <>; +encode_binary(<>, A) -> + BB = (B1 bsl 16) bor (B2 bsl 8) bor B3, + encode_binary(Ls, + <>). + +encode_list([], A) -> + A; +encode_list([B1], A) -> + <>; +encode_list([B1,B2], A) -> + <>; +encode_list([B1,B2,B3|Ls], A) -> + BB = (B1 bsl 16) bor (B2 bsl 8) bor B3, + encode_list(Ls, + <>). + +%% mime_decode strips away all characters not Base64 before +%% converting, whereas decode crashes if an illegal character is found -spec decode(Base64) -> Data when Base64 :: ascii_string() | ascii_binary(), @@ -122,158 +128,280 @@ decode(List) when is_list(List) -> Data :: ascii_binary(). mime_decode(Bin) when is_binary(Bin) -> - mime_decode_binary(<<>>, Bin); + mime_decode_binary(Bin, <<>>); mime_decode(List) when is_list(List) -> - mime_decode(list_to_binary(List)). + mime_decode_list(List, <<>>). --spec decode_l(ascii_string()) -> ascii_string(). - -decode_l(List) -> - L = strip_spaces(List, []), - decode(L, []). - --spec mime_decode_l(ascii_string()) -> ascii_string(). - -mime_decode_l(List) -> - L = strip_illegal(List, [], 0), - decode(L, []). - -%%------------------------------------------------------------------------- -%% mime_decode_to_string(Base64) -> ASCII -%% decode_to_string(Base64) -> ASCII -%% Base64 - string() | binary() -%% ASCII - binary() -%% -%% Description: Decodes an base64 encoded string to plain ASCII. -%% mime_decode strips away all characters not Base64 before converting, -%% whereas decode crashes if an illegal character is found -%%------------------------------------------------------------------------- +%% mime_decode_to_string strips away all characters not Base64 before +%% converting, whereas decode_to_string crashes if an illegal +%% character is found -spec decode_to_string(Base64) -> DataString when Base64 :: ascii_string() | ascii_binary(), DataString :: ascii_string(). decode_to_string(Bin) when is_binary(Bin) -> - decode_to_string(binary_to_list(Bin)); + decode_binary_to_string(Bin); decode_to_string(List) when is_list(List) -> - decode_l(List). + decode_list_to_string(List). -spec mime_decode_to_string(Base64) -> DataString when Base64 :: ascii_string() | ascii_binary(), DataString :: ascii_string(). mime_decode_to_string(Bin) when is_binary(Bin) -> - mime_decode_to_string(binary_to_list(Bin)); + mime_decode_binary_to_string(Bin); mime_decode_to_string(List) when is_list(List) -> - mime_decode_l(List). - -%% One-based decode map. --define(DECODE_MAP, - {bad,bad,bad,bad,bad,bad,bad,bad,ws,ws,bad,bad,ws,bad,bad, %1-15 - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, %16-31 - ws,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,62,bad,bad,bad,63, %32-47 - 52,53,54,55,56,57,58,59,60,61,bad,bad,bad,eq,bad,bad, %48-63 - bad,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14, - 15,16,17,18,19,20,21,22,23,24,25,bad,bad,bad,bad,bad, - bad,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, - 41,42,43,44,45,46,47,48,49,50,51,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, - bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad}). + mime_decode_list_to_string(List). -decode_binary(<>, A) -> - case element(C1, ?DECODE_MAP) of - ws -> decode_binary(Cs, A); - B1 -> decode_binary(Cs, A, B1) +%% Skipping pad character if not at end of string. Also liberal about +%% excess padding and skipping of other illegal (non-base64 alphabet) +%% characters. See section 3.3 of RFC4648 +mime_decode_list([0 | Cs], A) -> + mime_decode_list(Cs, A); +mime_decode_list([C1 | Cs], A) -> + case b64d(C1) of + B1 when is_integer(B1) -> mime_decode_list(Cs, A, B1); + _ -> mime_decode_list(Cs, A) % eq is padding end; -decode_binary(<<>>, A) -> +mime_decode_list([], A) -> A. -decode_binary(<>, A, B1) -> - case element(C2, ?DECODE_MAP) of - ws -> decode_binary(Cs, A, B1); - B2 -> decode_binary(Cs, A, B1, B2) +mime_decode_list([0 | Cs], A, B1) -> + mime_decode_list(Cs, A, B1); +mime_decode_list([C2 | Cs], A, B1) -> + case b64d(C2) of + B2 when is_integer(B2) -> + mime_decode_list(Cs, A, B1, B2); + _ -> mime_decode_list(Cs, A, B1) % eq is padding end. -decode_binary(<>, A, B1, B2) -> - case element(C3, ?DECODE_MAP) of - ws -> decode_binary(Cs, A, B1, B2); - B3 -> decode_binary(Cs, A, B1, B2, B3) +mime_decode_list([0 | Cs], A, B1, B2) -> + mime_decode_list(Cs, A, B1, B2); +mime_decode_list([C3 | Cs], A, B1, B2) -> + case b64d(C3) of + B3 when is_integer(B3) -> + mime_decode_list(Cs, A, B1, B2, B3); + eq=B3 -> + mime_decode_list_after_eq(Cs, A, B1, B2, B3); + _ -> mime_decode_list(Cs, A, B1, B2) end. -decode_binary(<>, A, B1, B2, B3) -> - case element(C4, ?DECODE_MAP) of - ws -> decode_binary(Cs, A, B1, B2, B3); - eq when B3 =:= eq -> only_ws_binary(Cs, <>); - eq -> only_ws_binary(Cs, <>); - B4 -> decode_binary(Cs, <>) +mime_decode_list([0 | Cs], A, B1, B2, B3) -> + mime_decode_list(Cs, A, B1, B2, B3); +mime_decode_list([C4 | Cs], A, B1, B2, B3) -> + case b64d(C4) of + B4 when is_integer(B4) -> + mime_decode_list(Cs, <>); + eq -> + mime_decode_list_after_eq(Cs, A, B1, B2, B3); + _ -> mime_decode_list(Cs, A, B1, B2, B3) end. -only_ws_binary(<<>>, A) -> - A; -only_ws_binary(<>, A) -> - case element(C, ?DECODE_MAP) of - ws -> only_ws_binary(Cs, A); - _ -> erlang:error(function_clause) +mime_decode_list_after_eq([0 | Cs], A, B1, B2, B3) -> + mime_decode_list_after_eq(Cs, A, B1, B2, B3); +mime_decode_list_after_eq([C | Cs], A, B1, B2, B3) -> + case b64d(C) of + B when is_integer(B) -> + %% More valid data, skip the eq as invalid + case B3 of + eq -> mime_decode_list(Cs, A, B1, B2, B); + _ -> mime_decode_list(Cs, <>) + end; + _ -> mime_decode_list_after_eq(Cs, A, B1, B2, B3) + end; +mime_decode_list_after_eq([], A, B1, B2, eq) -> + <>; +mime_decode_list_after_eq([], A, B1, B2, B3) -> + <>. + +mime_decode_binary(<<0:8, Cs/bits>>, A) -> + mime_decode_binary(Cs, A); +mime_decode_binary(<>, A) -> + case b64d(C1) of + B1 when is_integer(B1) -> mime_decode_binary(Cs, A, B1); + _ -> mime_decode_binary(Cs, A) % eq is padding + end; +mime_decode_binary(<<>>, A) -> + A. + +mime_decode_binary(<<0:8, Cs/bits>>, A, B1) -> + mime_decode_binary(Cs, A, B1); +mime_decode_binary(<>, A, B1) -> + case b64d(C2) of + B2 when is_integer(B2) -> + mime_decode_binary(Cs, A, B1, B2); + _ -> mime_decode_binary(Cs, A, B1) % eq is padding end. -%% Skipping pad character if not at end of string. Also liberal about -%% excess padding and skipping of other illegal (non-base64 alphabet) -%% characters. See section 3.3 of RFC4648 -mime_decode_binary(Result, <<0:8,T/bits>>) -> - mime_decode_binary(Result, T); -mime_decode_binary(Result0, <>) -> - case element(C, ?DECODE_MAP) of - Bits when is_integer(Bits) -> - mime_decode_binary(<>, T); +mime_decode_binary(<<0:8, Cs/bits>>, A, B1, B2) -> + mime_decode_binary(Cs, A, B1, B2); +mime_decode_binary(<>, A, B1, B2) -> + case b64d(C3) of + B3 when is_integer(B3) -> + mime_decode_binary(Cs, A, B1, B2, B3); + eq=B3 -> + mime_decode_binary_after_eq(Cs, A, B1, B2, B3); + _ -> mime_decode_binary(Cs, A, B1, B2) + end. + +mime_decode_binary(<<0:8, Cs/bits>>, A, B1, B2, B3) -> + mime_decode_binary(Cs, A, B1, B2, B3); +mime_decode_binary(<>, A, B1, B2, B3) -> + case b64d(C4) of + B4 when is_integer(B4) -> + mime_decode_binary(Cs, <>); eq -> - mime_decode_binary_after_eq(Result0, T, false); - _ -> - mime_decode_binary(Result0, T) + mime_decode_binary_after_eq(Cs, A, B1, B2, B3); + _ -> mime_decode_binary(Cs, A, B1, B2, B3) + end. + +mime_decode_binary_after_eq(<<0:8, Cs/bits>>, A, B1, B2, B3) -> + mime_decode_binary_after_eq(Cs, A, B1, B2, B3); +mime_decode_binary_after_eq(<>, A, B1, B2, B3) -> + case b64d(C) of + B when is_integer(B) -> + %% More valid data, skip the eq as invalid + case B3 of + eq -> mime_decode_binary(Cs, A, B1, B2, B); + _ -> mime_decode_binary(Cs, <>) + end; + _ -> mime_decode_binary_after_eq(Cs, A, B1, B2, B3) end; -mime_decode_binary(Result, _) -> - true = is_binary(Result), - Result. - -mime_decode_binary_after_eq(Result, <<0:8,T/bits>>, Eq) -> - mime_decode_binary_after_eq(Result, T, Eq); -mime_decode_binary_after_eq(Result0, <>, Eq) -> - case element(C, ?DECODE_MAP) of - bad -> - mime_decode_binary_after_eq(Result0, T, Eq); - ws -> - mime_decode_binary_after_eq(Result0, T, Eq); +mime_decode_binary_after_eq(<<>>, A, B1, B2, eq) -> + <>; +mime_decode_binary_after_eq(<<>>, A, B1, B2, B3) -> + <>. + +mime_decode_list_to_string([0 | Cs]) -> + mime_decode_list_to_string(Cs); +mime_decode_list_to_string([C1 | Cs]) -> + case b64d(C1) of + B1 when is_integer(B1) -> mime_decode_list_to_string(Cs, B1); + _ -> mime_decode_list_to_string(Cs) % eq is padding + end; +mime_decode_list_to_string([]) -> + []. + +mime_decode_list_to_string([0 | Cs], B1) -> + mime_decode_list_to_string(Cs, B1); +mime_decode_list_to_string([C2 | Cs], B1) -> + case b64d(C2) of + B2 when is_integer(B2) -> + mime_decode_list_to_string(Cs, B1, B2); + _ -> mime_decode_list_to_string(Cs, B1) % eq is padding + end. + +mime_decode_list_to_string([0 | Cs], B1, B2) -> + mime_decode_list_to_string(Cs, B1, B2); +mime_decode_list_to_string([C3 | Cs], B1, B2) -> + case b64d(C3) of + B3 when is_integer(B3) -> + mime_decode_list_to_string(Cs, B1, B2, B3); + eq=B3 -> mime_decode_list_to_string_after_eq(Cs, B1, B2, B3); + _ -> mime_decode_list_to_string(Cs, B1, B2) + end. + +mime_decode_list_to_string([0 | Cs], B1, B2, B3) -> + mime_decode_list_to_string(Cs, B1, B2, B3); +mime_decode_list_to_string([C4 | Cs], B1, B2, B3) -> + case b64d(C4) of + B4 when is_integer(B4) -> + Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4, + Octet1 = Bits4x6 bsr 16, + Octet2 = (Bits4x6 bsr 8) band 16#ff, + Octet3 = Bits4x6 band 16#ff, + [Octet1, Octet2, Octet3 | mime_decode_list_to_string(Cs)]; eq -> - mime_decode_binary_after_eq(Result0, T, true); - Bits when is_integer(Bits) -> + mime_decode_list_to_string_after_eq(Cs, B1, B2, B3); + _ -> mime_decode_list_to_string(Cs, B1, B2, B3) + end. + +mime_decode_list_to_string_after_eq([0 | Cs], B1, B2, B3) -> + mime_decode_list_to_string_after_eq(Cs, B1, B2, B3); +mime_decode_list_to_string_after_eq([C | Cs], B1, B2, B3) -> + case b64d(C) of + B when is_integer(B) -> %% More valid data, skip the eq as invalid - mime_decode_binary(<>, T) + case B3 of + eq -> mime_decode_list_to_string(Cs, B1, B2, B); + _ -> + Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B, + Octet1 = Bits4x6 bsr 16, + Octet2 = (Bits4x6 bsr 8) band 16#ff, + Octet3 = Bits4x6 band 16#ff, + [Octet1, Octet2, Octet3 | mime_decode_list_to_string(Cs)] + end; + _ -> mime_decode_list_to_string_after_eq(Cs, B1, B2, B3) + end; +mime_decode_list_to_string_after_eq([], B1, B2, eq) -> + binary_to_list(<>); +mime_decode_list_to_string_after_eq([], B1, B2, B3) -> + binary_to_list(<>). + +mime_decode_binary_to_string(<<0:8, Cs/bits>>) -> + mime_decode_binary_to_string(Cs); +mime_decode_binary_to_string(<>) -> + case b64d(C1) of + B1 when is_integer(B1) -> mime_decode_binary_to_string(Cs, B1); + _ -> mime_decode_binary_to_string(Cs) % eq is padding end; -mime_decode_binary_after_eq(Result0, <<>>, Eq) -> - %% No more valid data. - case bit_size(Result0) rem 8 of - 0 -> - %% '====' is not uncommon. - Result0; - 4 when Eq -> - %% enforce at least one more '=' only ignoring illegals and spacing - Split = byte_size(Result0) - 1, - <> = Result0, - Result; - 2 -> - %% remove 2 bits - Split = byte_size(Result0) - 1, - <> = Result0, - Result +mime_decode_binary_to_string(<<>>) -> + []. + +mime_decode_binary_to_string(<<0:8, Cs/bits>>, B1) -> + mime_decode_binary_to_string(Cs, B1); +mime_decode_binary_to_string(<>, B1) -> + case b64d(C2) of + B2 when is_integer(B2) -> + mime_decode_binary_to_string(Cs, B1, B2); + _ -> mime_decode_binary_to_string(Cs, B1) % eq is padding + end. + +mime_decode_binary_to_string(<<0:8, Cs/bits>>, B1, B2) -> + mime_decode_binary_to_string(Cs, B1, B2); +mime_decode_binary_to_string(<>, B1, B2) -> + case b64d(C3) of + B3 when is_integer(B3) -> + mime_decode_binary_to_string(Cs, B1, B2, B3); + eq=B3 -> mime_decode_binary_to_string_after_eq(Cs, B1, B2, B3); + _ -> mime_decode_binary_to_string(Cs, B1, B2) + end. + +mime_decode_binary_to_string(<<0:8, Cs/bits>>, B1, B2, B3) -> + mime_decode_binary_to_string(Cs, B1, B2, B3); +mime_decode_binary_to_string(<>, B1, B2, B3) -> + case b64d(C4) of + B4 when is_integer(B4) -> + Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4, + Octet1 = Bits4x6 bsr 16, + Octet2 = (Bits4x6 bsr 8) band 16#ff, + Octet3 = Bits4x6 band 16#ff, + [Octet1, Octet2, Octet3 | mime_decode_binary_to_string(Cs)]; + eq -> + mime_decode_binary_to_string_after_eq(Cs, B1, B2, B3); + _ -> mime_decode_binary_to_string(Cs, B1, B2, B3) end. +mime_decode_binary_to_string_after_eq(<<0:8, Cs/bits>>, B1, B2, B3) -> + mime_decode_binary_to_string_after_eq(Cs, B1, B2, B3); +mime_decode_binary_to_string_after_eq(<>=Cs0, B1, B2, B3) -> + case b64d(C) of + B when is_integer(B) -> + %% More valid data, skip the eq as invalid + case B3 of + eq -> mime_decode_binary_to_string(Cs, B1, B2, B); + _ -> mime_decode_binary_to_string(Cs0, B1, B2, B3) + end; + _ -> mime_decode_binary_to_string_after_eq(Cs, B1, B2, B3) + end; +mime_decode_binary_to_string_after_eq(<<>>, B1, B2, eq) -> + binary_to_list(<>); +mime_decode_binary_to_string_after_eq(<<>>, B1, B2, B3) -> + binary_to_list(<>). + decode_list([C1 | Cs], A) -> - case element(C1, ?DECODE_MAP) of + case b64d(C1) of ws -> decode_list(Cs, A); B1 -> decode_list(Cs, A, B1) end; @@ -281,122 +409,167 @@ decode_list([], A) -> A. decode_list([C2 | Cs], A, B1) -> - case element(C2, ?DECODE_MAP) of + case b64d(C2) of ws -> decode_list(Cs, A, B1); B2 -> decode_list(Cs, A, B1, B2) end. decode_list([C3 | Cs], A, B1, B2) -> - case element(C3, ?DECODE_MAP) of + case b64d(C3) of ws -> decode_list(Cs, A, B1, B2); B3 -> decode_list(Cs, A, B1, B2, B3) end. decode_list([C4 | Cs], A, B1, B2, B3) -> - case element(C4, ?DECODE_MAP) of + case b64d(C4) of ws -> decode_list(Cs, A, B1, B2, B3); eq when B3 =:= eq -> only_ws(Cs, <>); eq -> only_ws(Cs, <>); B4 -> decode_list(Cs, <>) end. +decode_binary(<>, A) -> + case b64d(C1) of + ws -> decode_binary(Cs, A); + B1 -> decode_binary(Cs, A, B1) + end; +decode_binary(<<>>, A) -> + A. + +decode_binary(<>, A, B1) -> + case b64d(C2) of + ws -> decode_binary(Cs, A, B1); + B2 -> decode_binary(Cs, A, B1, B2) + end. + +decode_binary(<>, A, B1, B2) -> + case b64d(C3) of + ws -> decode_binary(Cs, A, B1, B2); + B3 -> decode_binary(Cs, A, B1, B2, B3) + end. + +decode_binary(<>, A, B1, B2, B3) -> + case b64d(C4) of + ws -> decode_binary(Cs, A, B1, B2, B3); + eq when B3 =:= eq -> only_ws_binary(Cs, <>); + eq -> only_ws_binary(Cs, <>); + B4 -> decode_binary(Cs, <>) + end. + +decode_list_to_string([C1 | Cs]) -> + case b64d(C1) of + ws -> decode_list_to_string(Cs); + B1 -> decode_list_to_string(Cs, B1) + end; +decode_list_to_string([]) -> + []. + +decode_list_to_string([C2 | Cs], B1) -> + case b64d(C2) of + ws -> decode_list_to_string(Cs, B1); + B2 -> decode_list_to_string(Cs, B1, B2) + end. + +decode_list_to_string([C3 | Cs], B1, B2) -> + case b64d(C3) of + ws -> decode_list_to_string(Cs, B1, B2); + B3 -> decode_list_to_string(Cs, B1, B2, B3) + end. + +decode_list_to_string([C4 | Cs], B1, B2, B3) -> + case b64d(C4) of + ws -> + decode_list_to_string(Cs, B1, B2, B3); + eq when B3 =:= eq -> + only_ws(Cs, binary_to_list(<>)); + eq -> + only_ws(Cs, binary_to_list(<>)); + B4 -> + Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4, + Octet1 = Bits4x6 bsr 16, + Octet2 = (Bits4x6 bsr 8) band 16#ff, + Octet3 = Bits4x6 band 16#ff, + [Octet1, Octet2, Octet3 | decode_list_to_string(Cs)] + end. + only_ws([], A) -> A; only_ws([C | Cs], A) -> - case element(C, ?DECODE_MAP) of + case b64d(C) of ws -> only_ws(Cs, A); _ -> erlang:error(function_clause) end. -decode([], A) -> A; -decode([$=,$=,C2,C1|Cs], A) -> - Bits2x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12), - Octet1 = Bits2x6 bsr 16, - decode(Cs, [Octet1|A]); -decode([$=,C3,C2,C1|Cs], A) -> - Bits3x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12) - bor (b64d(C3) bsl 6), - Octet1 = Bits3x6 bsr 16, - Octet2 = (Bits3x6 bsr 8) band 16#ff, - decode(Cs, [Octet1,Octet2|A]); -decode([C4,C3,C2,C1| Cs], A) -> - Bits4x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12) - bor (b64d(C3) bsl 6) bor b64d(C4), - Octet1 = Bits4x6 bsr 16, - Octet2 = (Bits4x6 bsr 8) band 16#ff, - Octet3 = Bits4x6 band 16#ff, - decode(Cs, [Octet1,Octet2,Octet3|A]). +decode_binary_to_string(<>) -> + case b64d(C1) of + ws -> decode_binary_to_string(Cs); + B1 -> decode_binary_to_string(Cs, B1) + end; +decode_binary_to_string(<<>>) -> + []. -%%%======================================================================== -%%% Internal functions -%%%======================================================================== +decode_binary_to_string(<>, B1) -> + case b64d(C2) of + ws -> decode_binary_to_string(Cs, B1); + B2 -> decode_binary_to_string(Cs, B1, B2) + end. -strip_spaces([], A) -> A; -strip_spaces([$\s|Cs], A) -> strip_spaces(Cs, A); -strip_spaces([$\t|Cs], A) -> strip_spaces(Cs, A); -strip_spaces([$\r|Cs], A) -> strip_spaces(Cs, A); -strip_spaces([$\n|Cs], A) -> strip_spaces(Cs, A); -strip_spaces([C|Cs], A) -> strip_spaces(Cs, [C | A]). +decode_binary_to_string(<>, B1, B2) -> + case b64d(C3) of + ws -> decode_binary_to_string(Cs, B1, B2); + B3 -> decode_binary_to_string(Cs, B1, B2, B3) + end. -%% Skipping pad character if not at end of string. Also liberal about -%% excess padding and skipping of other illegal (non-base64 alphabet) -%% characters. See section 3.3 of RFC4648 -strip_illegal([], A, _Cnt) -> - A; -strip_illegal([0|Cs], A, Cnt) -> - strip_illegal(Cs, A, Cnt); -strip_illegal([C|Cs], A, Cnt) -> - case element(C, ?DECODE_MAP) of - bad -> - strip_illegal(Cs, A, Cnt); - ws -> - strip_illegal(Cs, A, Cnt); - eq -> - case {tail_contains_more(Cs, false), Cnt rem 4} of - {{[], _}, 0} -> - A; %% Ignore extra = - {{[], true}, 2} -> - [$=|[$=|A]]; %% 'XX==' - {{[], _}, 3} -> - [$=|A]; %% 'XXX=' - {{[H|T], _}, _} -> - %% more data, skip equals - strip_illegal(T, [H|A], Cnt+1) - end; - _ -> - strip_illegal(Cs, [C|A], Cnt+1) +decode_binary_to_string(<>, B1, B2, B3) -> + case b64d(C4) of + ws -> decode_binary_to_string(Cs, B1, B2, B3); + eq when B3 =:= eq -> + only_ws_binary(Cs, binary_to_list(<>)); + eq -> + only_ws_binary(Cs, binary_to_list(<>)); + B4 -> + Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4, + Octet1 = Bits4x6 bsr 16, + Octet2 = (Bits4x6 bsr 8) band 16#ff, + Octet3 = Bits4x6 band 16#ff, + [Octet1, Octet2, Octet3 | decode_binary_to_string(Cs)] end. -%% Search the tail for more valid data and remember if we saw -%% another equals along the way. -tail_contains_more([], Eq) -> - {[], Eq}; -tail_contains_more(<<>>, Eq) -> - {<<>>, Eq}; -tail_contains_more([C|T]=More, Eq) -> - case element(C, ?DECODE_MAP) of - bad -> - tail_contains_more(T, Eq); - ws -> - tail_contains_more(T, Eq); - eq -> - tail_contains_more(T, true); - _ -> - {More, Eq} - end; -tail_contains_more(<> =More, Eq) -> - case element(C, ?DECODE_MAP) of - bad -> - tail_contains_more(T, Eq); - ws -> - tail_contains_more(T, Eq); - eq -> - tail_contains_more(T, true); - _ -> - {More, Eq} +only_ws_binary(<<>>, A) -> + A; +only_ws_binary(<>, A) -> + case b64d(C) of + ws -> only_ws_binary(Cs, A); + _ -> erlang:error(function_clause) end. - + +%%%======================================================================== +%%% Internal functions +%%%======================================================================== + %% accessors +-compile({inline, [{b64d, 1}]}). +%% One-based decode map. +b64d(X) -> + element(X, + {bad,bad,bad,bad,bad,bad,bad,bad,ws,ws,bad,bad,ws,bad,bad, %1-15 + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, %16-31 + ws,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,62,bad,bad,bad,63, %32-47 + 52,53,54,55,56,57,58,59,60,61,bad,bad,bad,eq,bad,bad, %48-63 + bad,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14, + 15,16,17,18,19,20,21,22,23,24,25,bad,bad,bad,bad,bad, + bad,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, + 41,42,43,44,45,46,47,48,49,50,51,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, + bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad}). + +-compile({inline, [{b64e, 1}]}). b64e(X) -> element(X+1, {$A, $B, $C, $D, $E, $F, $G, $H, $I, $J, $K, $L, $M, $N, @@ -404,9 +577,3 @@ b64e(X) -> $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p, $q, $r, $s, $t, $u, $v, $w, $x, $y, $z, $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $+, $/}). - - -b64d(X) -> - b64d_ok(element(X, ?DECODE_MAP)). - -b64d_ok(I) when is_integer(I) -> I. diff --git a/lib/stdlib/test/base64_SUITE.erl b/lib/stdlib/test/base64_SUITE.erl index 48b3f5f959..1715b2ebe6 100644 --- a/lib/stdlib/test/base64_SUITE.erl +++ b/lib/stdlib/test/base64_SUITE.erl @@ -119,44 +119,51 @@ illegal(Config) when is_list(Config) -> ok. %%------------------------------------------------------------------------- %% mime_decode and mime_decode_to_string have different implementations -%% so test both with the same input separately. Both functions have -%% the same implementation for binary/string arguments. +%% so test both with the same input separately. %% %% Test base64:mime_decode/1. mime_decode(Config) when is_list(Config) -> + MimeDecode = fun(In) -> + Out = base64:mime_decode(In), + Out = base64:mime_decode(binary_to_list(In)) + end, %% Test correct padding - <<"one">> = base64:mime_decode(<<"b25l">>), - <<"on">> = base64:mime_decode(<<"b24=">>), - <<"o">> = base64:mime_decode(<<"bw==">>), + <<"one">> = MimeDecode(<<"b25l">>), + <<"on">> = MimeDecode(<<"b24=">>), + <<"o">> = MimeDecode(<<"bw==">>), %% Test 1 extra padding - <<"one">> = base64:mime_decode(<<"b25l= =">>), - <<"on">> = base64:mime_decode(<<"b24== =">>), - <<"o">> = base64:mime_decode(<<"bw=== =">>), + <<"one">> = MimeDecode(<<"b25l= =">>), + <<"on">> = MimeDecode(<<"b24== =">>), + <<"o">> = MimeDecode(<<"bw=== =">>), %% Test 2 extra padding - <<"one">> = base64:mime_decode(<<"b25l===">>), - <<"on">> = base64:mime_decode(<<"b24====">>), - <<"o">> = base64:mime_decode(<<"bw=====">>), + <<"one">> = MimeDecode(<<"b25l===">>), + <<"on">> = MimeDecode(<<"b24====">>), + <<"o">> = MimeDecode(<<"bw=====">>), %% Test misc embedded padding - <<"one">> = base64:mime_decode(<<"b2=5l===">>), - <<"on">> = base64:mime_decode(<<"b=24====">>), - <<"o">> = base64:mime_decode(<<"b=w=====">>), + <<"one">> = MimeDecode(<<"b2=5l===">>), + <<"on">> = MimeDecode(<<"b=24====">>), + <<"o">> = MimeDecode(<<"b=w=====">>), %% Test misc white space and illegals with embedded padding - <<"one">> = base64:mime_decode(<<" b~2=\r\n5()l===">>), - <<"on">> = base64:mime_decode(<<"\tb =2\"¤4=¤= ==">>), - <<"o">> = base64:mime_decode(<<"\nb=w=====">>), + <<"one">> = MimeDecode(<<" b~2=\r\n5()l===">>), + <<"on">> = MimeDecode(<<"\tb =2\"¤4=¤= ==">>), + <<"o">> = MimeDecode(<<"\nb=w=====">>), %% Two pads <<"Aladdin:open sesame">> = - base64:mime_decode("QWxhZGRpbjpvc()GVuIHNlc2FtZQ=="), + MimeDecode(<<"QWxhZGRpbjpvc()GVuIHNlc2FtZQ==">>), %% One pad to ignore, followed by more text - <<"Hello World!!">> = base64:mime_decode(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>), + <<"Hello World!!">> = MimeDecode(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>), %% No pad <<"Aladdin:open sesam">> = - base64:mime_decode("QWxhZGRpbjpvcG¤\")(VuIHNlc2Ft"), + MimeDecode(<<"QWxhZGRpbjpvcG¤\")(VuIHNlc2Ft">>), %% Encoded base 64 strings may be divided by non base 64 chars. %% In this cases whitespaces. <<"0123456789!@#0^&*();:<>,. []{}">> = - base64:mime_decode( + MimeDecode( <<"MDEy MzQ1Njc4 \tOSFAIzBeJ \nio)(oKTs6 PD4sLi \r\nBbXXt9">>), + %% Zeroes + <<"012">> = MimeDecode(<<"\000M\000D\000E\000y=\000">>), + <<"o">> = MimeDecode(<<"bw==\000">>), + <<"o">> = MimeDecode(<<"bw=\000=">>), ok. %%------------------------------------------------------------------------- @@ -165,39 +172,48 @@ mime_decode(Config) when is_list(Config) -> %% Test base64:mime_decode_to_string/1. mime_decode_to_string(Config) when is_list(Config) -> + MimeDecodeToString = + fun(In) -> + Out = base64:mime_decode_to_string(In), + Out = base64:mime_decode_to_string(binary_to_list(In)) + end, %% Test correct padding - "one" = base64:mime_decode_to_string(<<"b25l">>), - "on" = base64:mime_decode_to_string(<<"b24=">>), - "o" = base64:mime_decode_to_string(<<"bw==">>), + "one" = MimeDecodeToString(<<"b25l">>), + "on" = MimeDecodeToString(<<"b24=">>), + "o" = MimeDecodeToString(<<"bw==">>), %% Test 1 extra padding - "one" = base64:mime_decode_to_string(<<"b25l= =">>), - "on" = base64:mime_decode_to_string(<<"b24== =">>), - "o" = base64:mime_decode_to_string(<<"bw=== =">>), + "one" = MimeDecodeToString(<<"b25l= =">>), + "on" = MimeDecodeToString(<<"b24== =">>), + "o" = MimeDecodeToString(<<"bw=== =">>), %% Test 2 extra padding - "one" = base64:mime_decode_to_string(<<"b25l===">>), - "on" = base64:mime_decode_to_string(<<"b24====">>), - "o" = base64:mime_decode_to_string(<<"bw=====">>), + "one" = MimeDecodeToString(<<"b25l===">>), + "on" = MimeDecodeToString(<<"b24====">>), + "o" = MimeDecodeToString(<<"bw=====">>), %% Test misc embedded padding - "one" = base64:mime_decode_to_string(<<"b2=5l===">>), - "on" = base64:mime_decode_to_string(<<"b=24====">>), - "o" = base64:mime_decode_to_string(<<"b=w=====">>), + "one" = MimeDecodeToString(<<"b2=5l===">>), + "on" = MimeDecodeToString(<<"b=24====">>), + "o" = MimeDecodeToString(<<"b=w=====">>), %% Test misc white space and illegals with embedded padding - "one" = base64:mime_decode_to_string(<<" b~2=\r\n5()l===">>), - "on" = base64:mime_decode_to_string(<<"\tb =2\"¤4=¤= ==">>), - "o" = base64:mime_decode_to_string(<<"\nb=w=====">>), + "one" = MimeDecodeToString(<<" b~2=\r\n5()l===">>), + "on" = MimeDecodeToString(<<"\tb =2\"¤4=¤= ==">>), + "o" = MimeDecodeToString(<<"\nb=w=====">>), %% Two pads "Aladdin:open sesame" = - base64:mime_decode_to_string("QWxhZGRpbjpvc()GVuIHNlc2FtZQ=="), + MimeDecodeToString(<<"QWxhZGRpbjpvc()GVuIHNlc2FtZQ==">>), %% One pad to ignore, followed by more text - "Hello World!!" = base64:mime_decode_to_string(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>), + "Hello World!!" = MimeDecodeToString(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>), %% No pad "Aladdin:open sesam" = - base64:mime_decode_to_string("QWxhZGRpbjpvcG¤\")(VuIHNlc2Ft"), + MimeDecodeToString(<<"QWxhZGRpbjpvcG¤\")(VuIHNlc2Ft">>), %% Encoded base 64 strings may be divided by non base 64 chars. %% In this cases whitespaces. "0123456789!@#0^&*();:<>,. []{}" = - base64:mime_decode_to_string( + MimeDecodeToString( <<"MDEy MzQ1Njc4 \tOSFAIzBeJ \nio)(oKTs6 PD4sLi \r\nBbXXt9">>), + %% Zeroes + "012" = MimeDecodeToString(<<"\000M\000D\000E\000y=\000">>), + "o" = MimeDecodeToString(<<"bw==\000">>), + "o" = MimeDecodeToString(<<"bw=\000=">>), ok. %%------------------------------------------------------------------------- -- cgit v1.2.3