diff options
author | Thomas O'Dowd <tpodowd@geminimobile.com> | 2010-12-07 10:58:17 +0900 |
---|---|---|
committer | Thomas O'Dowd <tpodowd@geminimobile.com> | 2010-12-07 10:58:17 +0900 |
commit | 105fc6e844319a34df72efe6b2eb4823626a7957 (patch) | |
tree | 122f4abb07b74c9e419acf591a44581fc543d739 | |
parent | 74ec1253e1a89f0bb266831f73716a24c0798dc1 (diff) | |
download | otp-105fc6e844319a34df72efe6b2eb4823626a7957.tar.gz otp-105fc6e844319a34df72efe6b2eb4823626a7957.tar.bz2 otp-105fc6e844319a34df72efe6b2eb4823626a7957.zip |
Improve pad character handling in base64 MIME decoding functions
Implement the 'MAY' clauses from RFC4648 regarding the pad character
to make mime_decode() and mime_decode_to_string() functions more
tolerant of badly padded base64. The RFC is quoted below for easy
reference.
RFC4648 Section 3.3 with reference to MIME decoding:
Furthermore, such specifications MAY ignore the pad character, "=",
treating it as non-alphabet data, if it is present before the end of
the encoded data. If more than the allowed number of pad characters
is found at the end of the string (e.g., a base 64 string terminated
with "==="), the excess pad characters MAY also be ignored.
-rw-r--r-- | lib/stdlib/src/base64.erl | 111 | ||||
-rw-r--r-- | lib/stdlib/test/base64_SUITE.erl | 86 |
2 files changed, 155 insertions, 42 deletions
diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl index ebef998ee1..69d864051c 100644 --- a/lib/stdlib/src/base64.erl +++ b/lib/stdlib/src/base64.erl @@ -114,7 +114,7 @@ decode(List) when is_list(List) -> mime_decode(Bin) when is_binary(Bin) -> mime_decode_binary(<<>>, Bin); mime_decode(List) when is_list(List) -> - list_to_binary(mime_decode_l(List)). + mime_decode(list_to_binary(List)). -spec decode_l(string()) -> string(). @@ -125,7 +125,7 @@ decode_l(List) -> -spec mime_decode_l(string()) -> string(). mime_decode_l(List) -> - L = strip_illegal(List, []), + L = strip_illegal(List, [], 0), decode(L, []). %%------------------------------------------------------------------------- @@ -198,6 +198,9 @@ decode_binary(Result, <<>>) -> true = is_binary(Result), Result. +%% Skipping pad character if not at end of string. Also liberal about +%% excess padding and skipping of other illegal (non-base64 alphabet) +%% characters. See section 3.3 of RFC4648 mime_decode_binary(Result, <<0:8,T/bits>>) -> mime_decode_binary(Result, T); mime_decode_binary(Result0, <<C:8,T/bits>>) -> @@ -205,15 +208,27 @@ mime_decode_binary(Result0, <<C:8,T/bits>>) -> Bits when is_integer(Bits) -> mime_decode_binary(<<Result0/bits,Bits:6>>, T); eq -> - case tail_contains_equal(T) of - true -> - Split = byte_size(Result0) - 1, - <<Result:Split/bytes,_:4>> = Result0, - Result; - false -> - Split = byte_size(Result0) - 1, - <<Result:Split/bytes,_:2>> = Result0, - Result + case tail_contains_more(T, false) of + {<<>>, Eq} -> + %% No more valid data. + case bit_size(Result0) rem 8 of + 0 -> + %% '====' is not uncommon. + Result0; + 4 when Eq -> + %% enforce at least one more '=' only ignoring illegals and spacing + Split = size(Result0), + <<Result:Split/bytes,_:4>> = Result0, + Result; + 2 -> + %% remove 2 bits + Split = size(Result0), + <<Result:Split/bytes,_:2>> = Result0, + Result + end; + {More, _} -> + %% More valid data, skip the eq as invalid + mime_decode_binary(Result0, More) end; _ -> mime_decode_binary(Result0, T) @@ -262,31 +277,63 @@ strip_ws(<<$\s,T/binary>>) -> strip_ws(T); strip_ws(T) -> T. -strip_illegal([0|Cs], A) -> - strip_illegal(Cs, A); -strip_illegal([C|Cs], A) -> +%% Skipping pad character if not at end of string. Also liberal about +%% excess padding and skipping of other illegal (non-base64 alphabet) +%% characters. See section 3.3 of RFC4648 +strip_illegal([], A, _Cnt) -> + A; +strip_illegal([0|Cs], A, Cnt) -> + strip_illegal(Cs, A, Cnt); +strip_illegal([C|Cs], A, Cnt) -> case element(C, ?DECODE_MAP) of - bad -> strip_illegal(Cs, A); - ws -> strip_illegal(Cs, A); - eq -> strip_illegal_end(Cs, [$=|A]); - _ -> strip_illegal(Cs, [C|A]) - end; -strip_illegal([], A) -> A. + bad -> + strip_illegal(Cs, A, Cnt); + ws -> + strip_illegal(Cs, A, Cnt); + eq -> + case {tail_contains_more(Cs, false), Cnt rem 4} of + {{[], _}, 0} -> + A; %% Ignore extra = + {{[], true}, 2} -> + [$=|[$=|A]]; %% 'XX==' + {{[], _}, 3} -> + [$=|A]; %% 'XXX=' + {{[H|T], _}, _} -> + %% more data, skip equals + strip_illegal(T, [H|A], Cnt+1) + end; + _ -> + strip_illegal(Cs, [C|A], Cnt+1) + end. -strip_illegal_end([0|Cs], A) -> - strip_illegal_end(Cs, A); -strip_illegal_end([C|Cs], A) -> +%% Search the tail for more valid data and remember if we saw +%% another equals along the way. +tail_contains_more([], Eq) -> + {[], Eq}; +tail_contains_more(<<>>, Eq) -> + {<<>>, Eq}; +tail_contains_more([C|T]=More, Eq) -> case element(C, ?DECODE_MAP) of - bad -> strip_illegal(Cs, A); - ws -> strip_illegal(Cs, A); - eq -> [C|A]; - _ -> strip_illegal(Cs, [C|A]) + bad -> + tail_contains_more(T, Eq); + ws -> + tail_contains_more(T, Eq); + eq -> + tail_contains_more(T, true); + _ -> + {More, Eq} end; -strip_illegal_end([], A) -> A. - -tail_contains_equal(<<$=,_/binary>>) -> true; -tail_contains_equal(<<_,T/binary>>) -> tail_contains_equal(T); -tail_contains_equal(<<>>) -> false. +tail_contains_more(<<C:8,T/bits>> =More, Eq) -> + case element(C, ?DECODE_MAP) of + bad -> + tail_contains_more(T, Eq); + ws -> + tail_contains_more(T, Eq); + eq -> + tail_contains_more(T, true); + _ -> + {More, Eq} + end. %% accessors b64e(X) -> diff --git a/lib/stdlib/test/base64_SUITE.erl b/lib/stdlib/test/base64_SUITE.erl index 44742063b3..3790e7d4c0 100644 --- a/lib/stdlib/test/base64_SUITE.erl +++ b/lib/stdlib/test/base64_SUITE.erl @@ -29,7 +29,7 @@ %% Test cases must be exported. -export([base64_encode/1, base64_decode/1, base64_otp_5635/1, base64_otp_6279/1, big/1, illegal/1, mime_decode/1, - roundtrip/1]). + mime_decode_to_string/1, roundtrip/1]). init_per_testcase(_, Config) -> Dog = test_server:timetrap(?t:minutes(2)), @@ -50,7 +50,7 @@ all(doc) -> all(suite) -> [base64_encode, base64_decode, base64_otp_5635, base64_otp_6279, big, illegal, mime_decode, - roundtrip]. + mime_decode_to_string, roundtrip]. %%------------------------------------------------------------------------- @@ -59,7 +59,7 @@ base64_encode(doc) -> base64_encode(suite) -> []; base64_encode(Config) when is_list(Config) -> - %% Two pads + %% Two pads <<"QWxhZGRpbjpvcGVuIHNlc2FtZQ==">> = base64:encode("Aladdin:open sesame"), %% One pad @@ -77,8 +77,8 @@ base64_decode(doc) -> base64_decode(suite) -> []; base64_decode(Config) when is_list(Config) -> - %% Two pads - <<"Aladdin:open sesame">> = + %% Two pads + <<"Aladdin:open sesame">> = base64:decode("QWxhZGRpbjpvcGVuIHNlc2FtZQ=="), %% One pad <<"Hello World">> = base64:decode(<<"SGVsbG8gV29ybGQ=">>), @@ -138,20 +138,85 @@ illegal(Config) when is_list(Config) -> {'EXIT',{function_clause, _}} = (catch base64:decode("()")), ok. %%------------------------------------------------------------------------- +%% mime_decode and mime_decode_to_string have different implementations +%% so test both with the same input separately. Both functions have +%% the same implementation for binary/string arguments. mime_decode(doc) -> ["Test base64:mime_decode/1."]; mime_decode(suite) -> []; mime_decode(Config) when is_list(Config) -> - %% Two pads - <<"Aladdin:open sesame">> = + %% Test correct padding + <<"one">> = base64:mime_decode(<<"b25l">>), + <<"on">> = base64:mime_decode(<<"b24=">>), + <<"o">> = base64:mime_decode(<<"bw==">>), + %% Test 1 extra padding + <<"one">> = base64:mime_decode(<<"b25l= =">>), + <<"on">> = base64:mime_decode(<<"b24== =">>), + <<"o">> = base64:mime_decode(<<"bw=== =">>), + %% Test 2 extra padding + <<"one">> = base64:mime_decode(<<"b25l===">>), + <<"on">> = base64:mime_decode(<<"b24====">>), + <<"o">> = base64:mime_decode(<<"bw=====">>), + %% Test misc embedded padding + <<"one">> = base64:mime_decode(<<"b2=5l===">>), + <<"on">> = base64:mime_decode(<<"b=24====">>), + <<"o">> = base64:mime_decode(<<"b=w=====">>), + %% Test misc white space and illegals with embedded padding + <<"one">> = base64:mime_decode(<<" b~2=\r\n5()l===">>), + <<"on">> = base64:mime_decode(<<"\tb =2\"�4=�= ==">>), + <<"o">> = base64:mime_decode(<<"\nb=w=====">>), + %% Two pads + <<"Aladdin:open sesame">> = base64:mime_decode("QWxhZGRpbjpvc()GVuIHNlc2FtZQ=="), - %% One pad, followed by ignored text - <<"Hello World">> = base64:mime_decode(<<"SGVsb)(G8gV29ybGQ=apa">>), + %% One pad to ignore, followed by more text + <<"Hello World!!">> = base64:mime_decode(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>), + %% No pad + <<"Aladdin:open sesam">> = + base64:mime_decode("QWxhZGRpbjpvcG�\")(VuIHNlc2Ft"), + %% Encoded base 64 strings may be divided by non base 64 chars. + %% In this cases whitespaces. + <<"0123456789!@#0^&*();:<>,. []{}">> = + base64:mime_decode( + <<"MDEy MzQ1Njc4 \tOSFAIzBeJ \nio)(oKTs6 PD4sLi \r\nBbXXt9">>), + ok. + +%%------------------------------------------------------------------------- + +%% Repeat of mime_decode() tests +mime_decode_to_string(doc) -> + ["Test base64:mime_decode_to_string/1."]; +mime_decode_to_string(suite) -> + []; +mime_decode_to_string(Config) when is_list(Config) -> + %% Test correct padding + "one" = base64:mime_decode_to_string(<<"b25l">>), + "on" = base64:mime_decode_to_string(<<"b24=">>), + "o" = base64:mime_decode_to_string(<<"bw==">>), + %% Test 1 extra padding + "one" = base64:mime_decode_to_string(<<"b25l= =">>), + "on" = base64:mime_decode_to_string(<<"b24== =">>), + "o" = base64:mime_decode_to_string(<<"bw=== =">>), + %% Test 2 extra padding + "one" = base64:mime_decode_to_string(<<"b25l===">>), + "on" = base64:mime_decode_to_string(<<"b24====">>), + "o" = base64:mime_decode_to_string(<<"bw=====">>), + %% Test misc embedded padding + "one" = base64:mime_decode_to_string(<<"b2=5l===">>), + "on" = base64:mime_decode_to_string(<<"b=24====">>), + "o" = base64:mime_decode_to_string(<<"b=w=====">>), + %% Test misc white space and illegals with embedded padding + "one" = base64:mime_decode_to_string(<<" b~2=\r\n5()l===">>), + "on" = base64:mime_decode_to_string(<<"\tb =2\"�4=�= ==">>), + "o" = base64:mime_decode_to_string(<<"\nb=w=====">>), + %% Two pads + "Aladdin:open sesame" = + base64:mime_decode_to_string("QWxhZGRpbjpvc()GVuIHNlc2FtZQ=="), + %% One pad to ignore, followed by more text + "Hello World!!" = base64:mime_decode_to_string(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>), %% No pad "Aladdin:open sesam" = base64:mime_decode_to_string("QWxhZGRpbjpvcG�\")(VuIHNlc2Ft"), - %% Encoded base 64 strings may be divided by non base 64 chars. %% In this cases whitespaces. "0123456789!@#0^&*();:<>,. []{}" = @@ -159,6 +224,7 @@ mime_decode(Config) when is_list(Config) -> <<"MDEy MzQ1Njc4 \tOSFAIzBeJ \nio)(oKTs6 PD4sLi \r\nBbXXt9">>), ok. +%%------------------------------------------------------------------------- roundtrip(Config) when is_list(Config) -> Sizes = lists:seq(1, 255) ++ lists:seq(2400-5, 2440), |