aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib
diff options
context:
space:
mode:
authorIngela Anderton Andin <[email protected]>2011-01-11 10:11:23 +0100
committerIngela Anderton Andin <[email protected]>2011-01-11 10:11:43 +0100
commitab1c28eadc968d8475ce42a97b2ae143590d5cc1 (patch)
tree21819ccca682ecb1ca45361240a0ef182618c5d7 /lib/stdlib
parent095d5ebc0f739982b55b95fdd9919a6331b4b2b1 (diff)
parent105fc6e844319a34df72efe6b2eb4823626a7957 (diff)
downloadotp-ab1c28eadc968d8475ce42a97b2ae143590d5cc1.tar.gz
otp-ab1c28eadc968d8475ce42a97b2ae143590d5cc1.tar.bz2
otp-ab1c28eadc968d8475ce42a97b2ae143590d5cc1.zip
Merge branch 'td/base64-mime-decoding' into dev
* td/base64-mime-decoding: Improve pad character handling in base64 MIME decoding functions OTP-9020
Diffstat (limited to 'lib/stdlib')
-rw-r--r--lib/stdlib/src/base64.erl111
-rw-r--r--lib/stdlib/test/base64_SUITE.erl86
2 files changed, 155 insertions, 42 deletions
diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl
index ebef998ee1..69d864051c 100644
--- a/lib/stdlib/src/base64.erl
+++ b/lib/stdlib/src/base64.erl
@@ -114,7 +114,7 @@ decode(List) when is_list(List) ->
mime_decode(Bin) when is_binary(Bin) ->
mime_decode_binary(<<>>, Bin);
mime_decode(List) when is_list(List) ->
- list_to_binary(mime_decode_l(List)).
+ mime_decode(list_to_binary(List)).
-spec decode_l(string()) -> string().
@@ -125,7 +125,7 @@ decode_l(List) ->
-spec mime_decode_l(string()) -> string().
mime_decode_l(List) ->
- L = strip_illegal(List, []),
+ L = strip_illegal(List, [], 0),
decode(L, []).
%%-------------------------------------------------------------------------
@@ -198,6 +198,9 @@ decode_binary(Result, <<>>) ->
true = is_binary(Result),
Result.
+%% Skipping pad character if not at end of string. Also liberal about
+%% excess padding and skipping of other illegal (non-base64 alphabet)
+%% characters. See section 3.3 of RFC4648
mime_decode_binary(Result, <<0:8,T/bits>>) ->
mime_decode_binary(Result, T);
mime_decode_binary(Result0, <<C:8,T/bits>>) ->
@@ -205,15 +208,27 @@ mime_decode_binary(Result0, <<C:8,T/bits>>) ->
Bits when is_integer(Bits) ->
mime_decode_binary(<<Result0/bits,Bits:6>>, T);
eq ->
- case tail_contains_equal(T) of
- true ->
- Split = byte_size(Result0) - 1,
- <<Result:Split/bytes,_:4>> = Result0,
- Result;
- false ->
- Split = byte_size(Result0) - 1,
- <<Result:Split/bytes,_:2>> = Result0,
- Result
+ case tail_contains_more(T, false) of
+ {<<>>, Eq} ->
+ %% No more valid data.
+ case bit_size(Result0) rem 8 of
+ 0 ->
+ %% '====' is not uncommon.
+ Result0;
+ 4 when Eq ->
+ %% enforce at least one more '=' only ignoring illegals and spacing
+ Split = size(Result0),
+ <<Result:Split/bytes,_:4>> = Result0,
+ Result;
+ 2 ->
+ %% remove 2 bits
+ Split = size(Result0),
+ <<Result:Split/bytes,_:2>> = Result0,
+ Result
+ end;
+ {More, _} ->
+ %% More valid data, skip the eq as invalid
+ mime_decode_binary(Result0, More)
end;
_ ->
mime_decode_binary(Result0, T)
@@ -262,31 +277,63 @@ strip_ws(<<$\s,T/binary>>) ->
strip_ws(T);
strip_ws(T) -> T.
-strip_illegal([0|Cs], A) ->
- strip_illegal(Cs, A);
-strip_illegal([C|Cs], A) ->
+%% Skipping pad character if not at end of string. Also liberal about
+%% excess padding and skipping of other illegal (non-base64 alphabet)
+%% characters. See section 3.3 of RFC4648
+strip_illegal([], A, _Cnt) ->
+ A;
+strip_illegal([0|Cs], A, Cnt) ->
+ strip_illegal(Cs, A, Cnt);
+strip_illegal([C|Cs], A, Cnt) ->
case element(C, ?DECODE_MAP) of
- bad -> strip_illegal(Cs, A);
- ws -> strip_illegal(Cs, A);
- eq -> strip_illegal_end(Cs, [$=|A]);
- _ -> strip_illegal(Cs, [C|A])
- end;
-strip_illegal([], A) -> A.
+ bad ->
+ strip_illegal(Cs, A, Cnt);
+ ws ->
+ strip_illegal(Cs, A, Cnt);
+ eq ->
+ case {tail_contains_more(Cs, false), Cnt rem 4} of
+ {{[], _}, 0} ->
+ A; %% Ignore extra =
+ {{[], true}, 2} ->
+ [$=|[$=|A]]; %% 'XX=='
+ {{[], _}, 3} ->
+ [$=|A]; %% 'XXX='
+ {{[H|T], _}, _} ->
+ %% more data, skip equals
+ strip_illegal(T, [H|A], Cnt+1)
+ end;
+ _ ->
+ strip_illegal(Cs, [C|A], Cnt+1)
+ end.
-strip_illegal_end([0|Cs], A) ->
- strip_illegal_end(Cs, A);
-strip_illegal_end([C|Cs], A) ->
+%% Search the tail for more valid data and remember if we saw
+%% another equals along the way.
+tail_contains_more([], Eq) ->
+ {[], Eq};
+tail_contains_more(<<>>, Eq) ->
+ {<<>>, Eq};
+tail_contains_more([C|T]=More, Eq) ->
case element(C, ?DECODE_MAP) of
- bad -> strip_illegal(Cs, A);
- ws -> strip_illegal(Cs, A);
- eq -> [C|A];
- _ -> strip_illegal(Cs, [C|A])
+ bad ->
+ tail_contains_more(T, Eq);
+ ws ->
+ tail_contains_more(T, Eq);
+ eq ->
+ tail_contains_more(T, true);
+ _ ->
+ {More, Eq}
end;
-strip_illegal_end([], A) -> A.
-
-tail_contains_equal(<<$=,_/binary>>) -> true;
-tail_contains_equal(<<_,T/binary>>) -> tail_contains_equal(T);
-tail_contains_equal(<<>>) -> false.
+tail_contains_more(<<C:8,T/bits>> =More, Eq) ->
+ case element(C, ?DECODE_MAP) of
+ bad ->
+ tail_contains_more(T, Eq);
+ ws ->
+ tail_contains_more(T, Eq);
+ eq ->
+ tail_contains_more(T, true);
+ _ ->
+ {More, Eq}
+ end.
%% accessors
b64e(X) ->
diff --git a/lib/stdlib/test/base64_SUITE.erl b/lib/stdlib/test/base64_SUITE.erl
index 44742063b3..3790e7d4c0 100644
--- a/lib/stdlib/test/base64_SUITE.erl
+++ b/lib/stdlib/test/base64_SUITE.erl
@@ -29,7 +29,7 @@
%% Test cases must be exported.
-export([base64_encode/1, base64_decode/1, base64_otp_5635/1,
base64_otp_6279/1, big/1, illegal/1, mime_decode/1,
- roundtrip/1]).
+ mime_decode_to_string/1, roundtrip/1]).
init_per_testcase(_, Config) ->
Dog = test_server:timetrap(?t:minutes(2)),
@@ -50,7 +50,7 @@ all(doc) ->
all(suite) ->
[base64_encode, base64_decode, base64_otp_5635,
base64_otp_6279, big, illegal, mime_decode,
- roundtrip].
+ mime_decode_to_string, roundtrip].
%%-------------------------------------------------------------------------
@@ -59,7 +59,7 @@ base64_encode(doc) ->
base64_encode(suite) ->
[];
base64_encode(Config) when is_list(Config) ->
- %% Two pads
+ %% Two pads
<<"QWxhZGRpbjpvcGVuIHNlc2FtZQ==">> =
base64:encode("Aladdin:open sesame"),
%% One pad
@@ -77,8 +77,8 @@ base64_decode(doc) ->
base64_decode(suite) ->
[];
base64_decode(Config) when is_list(Config) ->
- %% Two pads
- <<"Aladdin:open sesame">> =
+ %% Two pads
+ <<"Aladdin:open sesame">> =
base64:decode("QWxhZGRpbjpvcGVuIHNlc2FtZQ=="),
%% One pad
<<"Hello World">> = base64:decode(<<"SGVsbG8gV29ybGQ=">>),
@@ -138,20 +138,85 @@ illegal(Config) when is_list(Config) ->
{'EXIT',{function_clause, _}} = (catch base64:decode("()")),
ok.
%%-------------------------------------------------------------------------
+%% mime_decode and mime_decode_to_string have different implementations
+%% so test both with the same input separately. Both functions have
+%% the same implementation for binary/string arguments.
mime_decode(doc) ->
["Test base64:mime_decode/1."];
mime_decode(suite) ->
[];
mime_decode(Config) when is_list(Config) ->
- %% Two pads
- <<"Aladdin:open sesame">> =
+ %% Test correct padding
+ <<"one">> = base64:mime_decode(<<"b25l">>),
+ <<"on">> = base64:mime_decode(<<"b24=">>),
+ <<"o">> = base64:mime_decode(<<"bw==">>),
+ %% Test 1 extra padding
+ <<"one">> = base64:mime_decode(<<"b25l= =">>),
+ <<"on">> = base64:mime_decode(<<"b24== =">>),
+ <<"o">> = base64:mime_decode(<<"bw=== =">>),
+ %% Test 2 extra padding
+ <<"one">> = base64:mime_decode(<<"b25l===">>),
+ <<"on">> = base64:mime_decode(<<"b24====">>),
+ <<"o">> = base64:mime_decode(<<"bw=====">>),
+ %% Test misc embedded padding
+ <<"one">> = base64:mime_decode(<<"b2=5l===">>),
+ <<"on">> = base64:mime_decode(<<"b=24====">>),
+ <<"o">> = base64:mime_decode(<<"b=w=====">>),
+ %% Test misc white space and illegals with embedded padding
+ <<"one">> = base64:mime_decode(<<" b~2=\r\n5()l===">>),
+ <<"on">> = base64:mime_decode(<<"\tb =2\"�4=�= ==">>),
+ <<"o">> = base64:mime_decode(<<"\nb=w=====">>),
+ %% Two pads
+ <<"Aladdin:open sesame">> =
base64:mime_decode("QWxhZGRpbjpvc()GVuIHNlc2FtZQ=="),
- %% One pad, followed by ignored text
- <<"Hello World">> = base64:mime_decode(<<"SGVsb)(G8gV29ybGQ=apa">>),
+ %% One pad to ignore, followed by more text
+ <<"Hello World!!">> = base64:mime_decode(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>),
+ %% No pad
+ <<"Aladdin:open sesam">> =
+ base64:mime_decode("QWxhZGRpbjpvcG�\")(VuIHNlc2Ft"),
+ %% Encoded base 64 strings may be divided by non base 64 chars.
+ %% In this cases whitespaces.
+ <<"0123456789!@#0^&*();:<>,. []{}">> =
+ base64:mime_decode(
+ <<"MDEy MzQ1Njc4 \tOSFAIzBeJ \nio)(oKTs6 PD4sLi \r\nBbXXt9">>),
+ ok.
+
+%%-------------------------------------------------------------------------
+
+%% Repeat of mime_decode() tests
+mime_decode_to_string(doc) ->
+ ["Test base64:mime_decode_to_string/1."];
+mime_decode_to_string(suite) ->
+ [];
+mime_decode_to_string(Config) when is_list(Config) ->
+ %% Test correct padding
+ "one" = base64:mime_decode_to_string(<<"b25l">>),
+ "on" = base64:mime_decode_to_string(<<"b24=">>),
+ "o" = base64:mime_decode_to_string(<<"bw==">>),
+ %% Test 1 extra padding
+ "one" = base64:mime_decode_to_string(<<"b25l= =">>),
+ "on" = base64:mime_decode_to_string(<<"b24== =">>),
+ "o" = base64:mime_decode_to_string(<<"bw=== =">>),
+ %% Test 2 extra padding
+ "one" = base64:mime_decode_to_string(<<"b25l===">>),
+ "on" = base64:mime_decode_to_string(<<"b24====">>),
+ "o" = base64:mime_decode_to_string(<<"bw=====">>),
+ %% Test misc embedded padding
+ "one" = base64:mime_decode_to_string(<<"b2=5l===">>),
+ "on" = base64:mime_decode_to_string(<<"b=24====">>),
+ "o" = base64:mime_decode_to_string(<<"b=w=====">>),
+ %% Test misc white space and illegals with embedded padding
+ "one" = base64:mime_decode_to_string(<<" b~2=\r\n5()l===">>),
+ "on" = base64:mime_decode_to_string(<<"\tb =2\"�4=�= ==">>),
+ "o" = base64:mime_decode_to_string(<<"\nb=w=====">>),
+ %% Two pads
+ "Aladdin:open sesame" =
+ base64:mime_decode_to_string("QWxhZGRpbjpvc()GVuIHNlc2FtZQ=="),
+ %% One pad to ignore, followed by more text
+ "Hello World!!" = base64:mime_decode_to_string(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>),
%% No pad
"Aladdin:open sesam" =
base64:mime_decode_to_string("QWxhZGRpbjpvcG�\")(VuIHNlc2Ft"),
-
%% Encoded base 64 strings may be divided by non base 64 chars.
%% In this cases whitespaces.
"0123456789!@#0^&*();:<>,. []{}" =
@@ -159,6 +224,7 @@ mime_decode(Config) when is_list(Config) ->
<<"MDEy MzQ1Njc4 \tOSFAIzBeJ \nio)(oKTs6 PD4sLi \r\nBbXXt9">>),
ok.
+%%-------------------------------------------------------------------------
roundtrip(Config) when is_list(Config) ->
Sizes = lists:seq(1, 255) ++ lists:seq(2400-5, 2440),