diff options
author | Ingela Anderton Andin <[email protected]> | 2011-01-11 10:11:23 +0100 |
---|---|---|
committer | Ingela Anderton Andin <[email protected]> | 2011-01-11 10:11:43 +0100 |
commit | ab1c28eadc968d8475ce42a97b2ae143590d5cc1 (patch) | |
tree | 21819ccca682ecb1ca45361240a0ef182618c5d7 /lib/stdlib | |
parent | 095d5ebc0f739982b55b95fdd9919a6331b4b2b1 (diff) | |
parent | 105fc6e844319a34df72efe6b2eb4823626a7957 (diff) | |
download | otp-ab1c28eadc968d8475ce42a97b2ae143590d5cc1.tar.gz otp-ab1c28eadc968d8475ce42a97b2ae143590d5cc1.tar.bz2 otp-ab1c28eadc968d8475ce42a97b2ae143590d5cc1.zip |
Merge branch 'td/base64-mime-decoding' into dev
* td/base64-mime-decoding:
Improve pad character handling in base64 MIME decoding functions
OTP-9020
Diffstat (limited to 'lib/stdlib')
-rw-r--r-- | lib/stdlib/src/base64.erl | 111 | ||||
-rw-r--r-- | lib/stdlib/test/base64_SUITE.erl | 86 |
2 files changed, 155 insertions, 42 deletions
diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl index ebef998ee1..69d864051c 100644 --- a/lib/stdlib/src/base64.erl +++ b/lib/stdlib/src/base64.erl @@ -114,7 +114,7 @@ decode(List) when is_list(List) -> mime_decode(Bin) when is_binary(Bin) -> mime_decode_binary(<<>>, Bin); mime_decode(List) when is_list(List) -> - list_to_binary(mime_decode_l(List)). + mime_decode(list_to_binary(List)). -spec decode_l(string()) -> string(). @@ -125,7 +125,7 @@ decode_l(List) -> -spec mime_decode_l(string()) -> string(). mime_decode_l(List) -> - L = strip_illegal(List, []), + L = strip_illegal(List, [], 0), decode(L, []). %%------------------------------------------------------------------------- @@ -198,6 +198,9 @@ decode_binary(Result, <<>>) -> true = is_binary(Result), Result. +%% Skipping pad character if not at end of string. Also liberal about +%% excess padding and skipping of other illegal (non-base64 alphabet) +%% characters. See section 3.3 of RFC4648 mime_decode_binary(Result, <<0:8,T/bits>>) -> mime_decode_binary(Result, T); mime_decode_binary(Result0, <<C:8,T/bits>>) -> @@ -205,15 +208,27 @@ mime_decode_binary(Result0, <<C:8,T/bits>>) -> Bits when is_integer(Bits) -> mime_decode_binary(<<Result0/bits,Bits:6>>, T); eq -> - case tail_contains_equal(T) of - true -> - Split = byte_size(Result0) - 1, - <<Result:Split/bytes,_:4>> = Result0, - Result; - false -> - Split = byte_size(Result0) - 1, - <<Result:Split/bytes,_:2>> = Result0, - Result + case tail_contains_more(T, false) of + {<<>>, Eq} -> + %% No more valid data. + case bit_size(Result0) rem 8 of + 0 -> + %% '====' is not uncommon. + Result0; + 4 when Eq -> + %% enforce at least one more '=' only ignoring illegals and spacing + Split = size(Result0), + <<Result:Split/bytes,_:4>> = Result0, + Result; + 2 -> + %% remove 2 bits + Split = size(Result0), + <<Result:Split/bytes,_:2>> = Result0, + Result + end; + {More, _} -> + %% More valid data, skip the eq as invalid + mime_decode_binary(Result0, More) end; _ -> mime_decode_binary(Result0, T) @@ -262,31 +277,63 @@ strip_ws(<<$\s,T/binary>>) -> strip_ws(T); strip_ws(T) -> T. -strip_illegal([0|Cs], A) -> - strip_illegal(Cs, A); -strip_illegal([C|Cs], A) -> +%% Skipping pad character if not at end of string. Also liberal about +%% excess padding and skipping of other illegal (non-base64 alphabet) +%% characters. See section 3.3 of RFC4648 +strip_illegal([], A, _Cnt) -> + A; +strip_illegal([0|Cs], A, Cnt) -> + strip_illegal(Cs, A, Cnt); +strip_illegal([C|Cs], A, Cnt) -> case element(C, ?DECODE_MAP) of - bad -> strip_illegal(Cs, A); - ws -> strip_illegal(Cs, A); - eq -> strip_illegal_end(Cs, [$=|A]); - _ -> strip_illegal(Cs, [C|A]) - end; -strip_illegal([], A) -> A. + bad -> + strip_illegal(Cs, A, Cnt); + ws -> + strip_illegal(Cs, A, Cnt); + eq -> + case {tail_contains_more(Cs, false), Cnt rem 4} of + {{[], _}, 0} -> + A; %% Ignore extra = + {{[], true}, 2} -> + [$=|[$=|A]]; %% 'XX==' + {{[], _}, 3} -> + [$=|A]; %% 'XXX=' + {{[H|T], _}, _} -> + %% more data, skip equals + strip_illegal(T, [H|A], Cnt+1) + end; + _ -> + strip_illegal(Cs, [C|A], Cnt+1) + end. -strip_illegal_end([0|Cs], A) -> - strip_illegal_end(Cs, A); -strip_illegal_end([C|Cs], A) -> +%% Search the tail for more valid data and remember if we saw +%% another equals along the way. +tail_contains_more([], Eq) -> + {[], Eq}; +tail_contains_more(<<>>, Eq) -> + {<<>>, Eq}; +tail_contains_more([C|T]=More, Eq) -> case element(C, ?DECODE_MAP) of - bad -> strip_illegal(Cs, A); - ws -> strip_illegal(Cs, A); - eq -> [C|A]; - _ -> strip_illegal(Cs, [C|A]) + bad -> + tail_contains_more(T, Eq); + ws -> + tail_contains_more(T, Eq); + eq -> + tail_contains_more(T, true); + _ -> + {More, Eq} end; -strip_illegal_end([], A) -> A. - -tail_contains_equal(<<$=,_/binary>>) -> true; -tail_contains_equal(<<_,T/binary>>) -> tail_contains_equal(T); -tail_contains_equal(<<>>) -> false. +tail_contains_more(<<C:8,T/bits>> =More, Eq) -> + case element(C, ?DECODE_MAP) of + bad -> + tail_contains_more(T, Eq); + ws -> + tail_contains_more(T, Eq); + eq -> + tail_contains_more(T, true); + _ -> + {More, Eq} + end. %% accessors b64e(X) -> diff --git a/lib/stdlib/test/base64_SUITE.erl b/lib/stdlib/test/base64_SUITE.erl index 44742063b3..3790e7d4c0 100644 --- a/lib/stdlib/test/base64_SUITE.erl +++ b/lib/stdlib/test/base64_SUITE.erl @@ -29,7 +29,7 @@ %% Test cases must be exported. -export([base64_encode/1, base64_decode/1, base64_otp_5635/1, base64_otp_6279/1, big/1, illegal/1, mime_decode/1, - roundtrip/1]). + mime_decode_to_string/1, roundtrip/1]). init_per_testcase(_, Config) -> Dog = test_server:timetrap(?t:minutes(2)), @@ -50,7 +50,7 @@ all(doc) -> all(suite) -> [base64_encode, base64_decode, base64_otp_5635, base64_otp_6279, big, illegal, mime_decode, - roundtrip]. + mime_decode_to_string, roundtrip]. %%------------------------------------------------------------------------- @@ -59,7 +59,7 @@ base64_encode(doc) -> base64_encode(suite) -> []; base64_encode(Config) when is_list(Config) -> - %% Two pads + %% Two pads <<"QWxhZGRpbjpvcGVuIHNlc2FtZQ==">> = base64:encode("Aladdin:open sesame"), %% One pad @@ -77,8 +77,8 @@ base64_decode(doc) -> base64_decode(suite) -> []; base64_decode(Config) when is_list(Config) -> - %% Two pads - <<"Aladdin:open sesame">> = + %% Two pads + <<"Aladdin:open sesame">> = base64:decode("QWxhZGRpbjpvcGVuIHNlc2FtZQ=="), %% One pad <<"Hello World">> = base64:decode(<<"SGVsbG8gV29ybGQ=">>), @@ -138,20 +138,85 @@ illegal(Config) when is_list(Config) -> {'EXIT',{function_clause, _}} = (catch base64:decode("()")), ok. %%------------------------------------------------------------------------- +%% mime_decode and mime_decode_to_string have different implementations +%% so test both with the same input separately. Both functions have +%% the same implementation for binary/string arguments. mime_decode(doc) -> ["Test base64:mime_decode/1."]; mime_decode(suite) -> []; mime_decode(Config) when is_list(Config) -> - %% Two pads - <<"Aladdin:open sesame">> = + %% Test correct padding + <<"one">> = base64:mime_decode(<<"b25l">>), + <<"on">> = base64:mime_decode(<<"b24=">>), + <<"o">> = base64:mime_decode(<<"bw==">>), + %% Test 1 extra padding + <<"one">> = base64:mime_decode(<<"b25l= =">>), + <<"on">> = base64:mime_decode(<<"b24== =">>), + <<"o">> = base64:mime_decode(<<"bw=== =">>), + %% Test 2 extra padding + <<"one">> = base64:mime_decode(<<"b25l===">>), + <<"on">> = base64:mime_decode(<<"b24====">>), + <<"o">> = base64:mime_decode(<<"bw=====">>), + %% Test misc embedded padding + <<"one">> = base64:mime_decode(<<"b2=5l===">>), + <<"on">> = base64:mime_decode(<<"b=24====">>), + <<"o">> = base64:mime_decode(<<"b=w=====">>), + %% Test misc white space and illegals with embedded padding + <<"one">> = base64:mime_decode(<<" b~2=\r\n5()l===">>), + <<"on">> = base64:mime_decode(<<"\tb =2\"�4=�= ==">>), + <<"o">> = base64:mime_decode(<<"\nb=w=====">>), + %% Two pads + <<"Aladdin:open sesame">> = base64:mime_decode("QWxhZGRpbjpvc()GVuIHNlc2FtZQ=="), - %% One pad, followed by ignored text - <<"Hello World">> = base64:mime_decode(<<"SGVsb)(G8gV29ybGQ=apa">>), + %% One pad to ignore, followed by more text + <<"Hello World!!">> = base64:mime_decode(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>), + %% No pad + <<"Aladdin:open sesam">> = + base64:mime_decode("QWxhZGRpbjpvcG�\")(VuIHNlc2Ft"), + %% Encoded base 64 strings may be divided by non base 64 chars. + %% In this cases whitespaces. + <<"0123456789!@#0^&*();:<>,. []{}">> = + base64:mime_decode( + <<"MDEy MzQ1Njc4 \tOSFAIzBeJ \nio)(oKTs6 PD4sLi \r\nBbXXt9">>), + ok. + +%%------------------------------------------------------------------------- + +%% Repeat of mime_decode() tests +mime_decode_to_string(doc) -> + ["Test base64:mime_decode_to_string/1."]; +mime_decode_to_string(suite) -> + []; +mime_decode_to_string(Config) when is_list(Config) -> + %% Test correct padding + "one" = base64:mime_decode_to_string(<<"b25l">>), + "on" = base64:mime_decode_to_string(<<"b24=">>), + "o" = base64:mime_decode_to_string(<<"bw==">>), + %% Test 1 extra padding + "one" = base64:mime_decode_to_string(<<"b25l= =">>), + "on" = base64:mime_decode_to_string(<<"b24== =">>), + "o" = base64:mime_decode_to_string(<<"bw=== =">>), + %% Test 2 extra padding + "one" = base64:mime_decode_to_string(<<"b25l===">>), + "on" = base64:mime_decode_to_string(<<"b24====">>), + "o" = base64:mime_decode_to_string(<<"bw=====">>), + %% Test misc embedded padding + "one" = base64:mime_decode_to_string(<<"b2=5l===">>), + "on" = base64:mime_decode_to_string(<<"b=24====">>), + "o" = base64:mime_decode_to_string(<<"b=w=====">>), + %% Test misc white space and illegals with embedded padding + "one" = base64:mime_decode_to_string(<<" b~2=\r\n5()l===">>), + "on" = base64:mime_decode_to_string(<<"\tb =2\"�4=�= ==">>), + "o" = base64:mime_decode_to_string(<<"\nb=w=====">>), + %% Two pads + "Aladdin:open sesame" = + base64:mime_decode_to_string("QWxhZGRpbjpvc()GVuIHNlc2FtZQ=="), + %% One pad to ignore, followed by more text + "Hello World!!" = base64:mime_decode_to_string(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>), %% No pad "Aladdin:open sesam" = base64:mime_decode_to_string("QWxhZGRpbjpvcG�\")(VuIHNlc2Ft"), - %% Encoded base 64 strings may be divided by non base 64 chars. %% In this cases whitespaces. "0123456789!@#0^&*();:<>,. []{}" = @@ -159,6 +224,7 @@ mime_decode(Config) when is_list(Config) -> <<"MDEy MzQ1Njc4 \tOSFAIzBeJ \nio)(oKTs6 PD4sLi \r\nBbXXt9">>), ok. +%%------------------------------------------------------------------------- roundtrip(Config) when is_list(Config) -> Sizes = lists:seq(1, 255) ++ lists:seq(2400-5, 2440), |