aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas O'Dowd <tpodowd@geminimobile.com>2010-12-07 10:58:17 +0900
committerThomas O'Dowd <tpodowd@geminimobile.com>2010-12-07 10:58:17 +0900
commit105fc6e844319a34df72efe6b2eb4823626a7957 (patch)
tree122f4abb07b74c9e419acf591a44581fc543d739
parent74ec1253e1a89f0bb266831f73716a24c0798dc1 (diff)
downloadotp-105fc6e844319a34df72efe6b2eb4823626a7957.tar.gz
otp-105fc6e844319a34df72efe6b2eb4823626a7957.tar.bz2
otp-105fc6e844319a34df72efe6b2eb4823626a7957.zip
Improve pad character handling in base64 MIME decoding functions
Implement the 'MAY' clauses from RFC4648 regarding the pad character to make mime_decode() and mime_decode_to_string() functions more tolerant of badly padded base64. The RFC is quoted below for easy reference. RFC4648 Section 3.3 with reference to MIME decoding: Furthermore, such specifications MAY ignore the pad character, "=", treating it as non-alphabet data, if it is present before the end of the encoded data. If more than the allowed number of pad characters is found at the end of the string (e.g., a base 64 string terminated with "==="), the excess pad characters MAY also be ignored.
-rw-r--r--lib/stdlib/src/base64.erl111
-rw-r--r--lib/stdlib/test/base64_SUITE.erl86
2 files changed, 155 insertions, 42 deletions
diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl
index ebef998ee1..69d864051c 100644
--- a/lib/stdlib/src/base64.erl
+++ b/lib/stdlib/src/base64.erl
@@ -114,7 +114,7 @@ decode(List) when is_list(List) ->
mime_decode(Bin) when is_binary(Bin) ->
mime_decode_binary(<<>>, Bin);
mime_decode(List) when is_list(List) ->
- list_to_binary(mime_decode_l(List)).
+ mime_decode(list_to_binary(List)).
-spec decode_l(string()) -> string().
@@ -125,7 +125,7 @@ decode_l(List) ->
-spec mime_decode_l(string()) -> string().
mime_decode_l(List) ->
- L = strip_illegal(List, []),
+ L = strip_illegal(List, [], 0),
decode(L, []).
%%-------------------------------------------------------------------------
@@ -198,6 +198,9 @@ decode_binary(Result, <<>>) ->
true = is_binary(Result),
Result.
+%% Skipping pad character if not at end of string. Also liberal about
+%% excess padding and skipping of other illegal (non-base64 alphabet)
+%% characters. See section 3.3 of RFC4648
mime_decode_binary(Result, <<0:8,T/bits>>) ->
mime_decode_binary(Result, T);
mime_decode_binary(Result0, <<C:8,T/bits>>) ->
@@ -205,15 +208,27 @@ mime_decode_binary(Result0, <<C:8,T/bits>>) ->
Bits when is_integer(Bits) ->
mime_decode_binary(<<Result0/bits,Bits:6>>, T);
eq ->
- case tail_contains_equal(T) of
- true ->
- Split = byte_size(Result0) - 1,
- <<Result:Split/bytes,_:4>> = Result0,
- Result;
- false ->
- Split = byte_size(Result0) - 1,
- <<Result:Split/bytes,_:2>> = Result0,
- Result
+ case tail_contains_more(T, false) of
+ {<<>>, Eq} ->
+ %% No more valid data.
+ case bit_size(Result0) rem 8 of
+ 0 ->
+ %% '====' is not uncommon.
+ Result0;
+ 4 when Eq ->
+ %% enforce at least one more '=' only ignoring illegals and spacing
+ Split = size(Result0),
+ <<Result:Split/bytes,_:4>> = Result0,
+ Result;
+ 2 ->
+ %% remove 2 bits
+ Split = size(Result0),
+ <<Result:Split/bytes,_:2>> = Result0,
+ Result
+ end;
+ {More, _} ->
+ %% More valid data, skip the eq as invalid
+ mime_decode_binary(Result0, More)
end;
_ ->
mime_decode_binary(Result0, T)
@@ -262,31 +277,63 @@ strip_ws(<<$\s,T/binary>>) ->
strip_ws(T);
strip_ws(T) -> T.
-strip_illegal([0|Cs], A) ->
- strip_illegal(Cs, A);
-strip_illegal([C|Cs], A) ->
+%% Skipping pad character if not at end of string. Also liberal about
+%% excess padding and skipping of other illegal (non-base64 alphabet)
+%% characters. See section 3.3 of RFC4648
+strip_illegal([], A, _Cnt) ->
+ A;
+strip_illegal([0|Cs], A, Cnt) ->
+ strip_illegal(Cs, A, Cnt);
+strip_illegal([C|Cs], A, Cnt) ->
case element(C, ?DECODE_MAP) of
- bad -> strip_illegal(Cs, A);
- ws -> strip_illegal(Cs, A);
- eq -> strip_illegal_end(Cs, [$=|A]);
- _ -> strip_illegal(Cs, [C|A])
- end;
-strip_illegal([], A) -> A.
+ bad ->
+ strip_illegal(Cs, A, Cnt);
+ ws ->
+ strip_illegal(Cs, A, Cnt);
+ eq ->
+ case {tail_contains_more(Cs, false), Cnt rem 4} of
+ {{[], _}, 0} ->
+ A; %% Ignore extra =
+ {{[], true}, 2} ->
+ [$=|[$=|A]]; %% 'XX=='
+ {{[], _}, 3} ->
+ [$=|A]; %% 'XXX='
+ {{[H|T], _}, _} ->
+ %% more data, skip equals
+ strip_illegal(T, [H|A], Cnt+1)
+ end;
+ _ ->
+ strip_illegal(Cs, [C|A], Cnt+1)
+ end.
-strip_illegal_end([0|Cs], A) ->
- strip_illegal_end(Cs, A);
-strip_illegal_end([C|Cs], A) ->
+%% Search the tail for more valid data and remember if we saw
+%% another equals along the way.
+tail_contains_more([], Eq) ->
+ {[], Eq};
+tail_contains_more(<<>>, Eq) ->
+ {<<>>, Eq};
+tail_contains_more([C|T]=More, Eq) ->
case element(C, ?DECODE_MAP) of
- bad -> strip_illegal(Cs, A);
- ws -> strip_illegal(Cs, A);
- eq -> [C|A];
- _ -> strip_illegal(Cs, [C|A])
+ bad ->
+ tail_contains_more(T, Eq);
+ ws ->
+ tail_contains_more(T, Eq);
+ eq ->
+ tail_contains_more(T, true);
+ _ ->
+ {More, Eq}
end;
-strip_illegal_end([], A) -> A.
-
-tail_contains_equal(<<$=,_/binary>>) -> true;
-tail_contains_equal(<<_,T/binary>>) -> tail_contains_equal(T);
-tail_contains_equal(<<>>) -> false.
+tail_contains_more(<<C:8,T/bits>> =More, Eq) ->
+ case element(C, ?DECODE_MAP) of
+ bad ->
+ tail_contains_more(T, Eq);
+ ws ->
+ tail_contains_more(T, Eq);
+ eq ->
+ tail_contains_more(T, true);
+ _ ->
+ {More, Eq}
+ end.
%% accessors
b64e(X) ->
diff --git a/lib/stdlib/test/base64_SUITE.erl b/lib/stdlib/test/base64_SUITE.erl
index 44742063b3..3790e7d4c0 100644
--- a/lib/stdlib/test/base64_SUITE.erl
+++ b/lib/stdlib/test/base64_SUITE.erl
@@ -29,7 +29,7 @@
%% Test cases must be exported.
-export([base64_encode/1, base64_decode/1, base64_otp_5635/1,
base64_otp_6279/1, big/1, illegal/1, mime_decode/1,
- roundtrip/1]).
+ mime_decode_to_string/1, roundtrip/1]).
init_per_testcase(_, Config) ->
Dog = test_server:timetrap(?t:minutes(2)),
@@ -50,7 +50,7 @@ all(doc) ->
all(suite) ->
[base64_encode, base64_decode, base64_otp_5635,
base64_otp_6279, big, illegal, mime_decode,
- roundtrip].
+ mime_decode_to_string, roundtrip].
%%-------------------------------------------------------------------------
@@ -59,7 +59,7 @@ base64_encode(doc) ->
base64_encode(suite) ->
[];
base64_encode(Config) when is_list(Config) ->
- %% Two pads
+ %% Two pads
<<"QWxhZGRpbjpvcGVuIHNlc2FtZQ==">> =
base64:encode("Aladdin:open sesame"),
%% One pad
@@ -77,8 +77,8 @@ base64_decode(doc) ->
base64_decode(suite) ->
[];
base64_decode(Config) when is_list(Config) ->
- %% Two pads
- <<"Aladdin:open sesame">> =
+ %% Two pads
+ <<"Aladdin:open sesame">> =
base64:decode("QWxhZGRpbjpvcGVuIHNlc2FtZQ=="),
%% One pad
<<"Hello World">> = base64:decode(<<"SGVsbG8gV29ybGQ=">>),
@@ -138,20 +138,85 @@ illegal(Config) when is_list(Config) ->
{'EXIT',{function_clause, _}} = (catch base64:decode("()")),
ok.
%%-------------------------------------------------------------------------
+%% mime_decode and mime_decode_to_string have different implementations
+%% so test both with the same input separately. Both functions have
+%% the same implementation for binary/string arguments.
mime_decode(doc) ->
["Test base64:mime_decode/1."];
mime_decode(suite) ->
[];
mime_decode(Config) when is_list(Config) ->
- %% Two pads
- <<"Aladdin:open sesame">> =
+ %% Test correct padding
+ <<"one">> = base64:mime_decode(<<"b25l">>),
+ <<"on">> = base64:mime_decode(<<"b24=">>),
+ <<"o">> = base64:mime_decode(<<"bw==">>),
+ %% Test 1 extra padding
+ <<"one">> = base64:mime_decode(<<"b25l= =">>),
+ <<"on">> = base64:mime_decode(<<"b24== =">>),
+ <<"o">> = base64:mime_decode(<<"bw=== =">>),
+ %% Test 2 extra padding
+ <<"one">> = base64:mime_decode(<<"b25l===">>),
+ <<"on">> = base64:mime_decode(<<"b24====">>),
+ <<"o">> = base64:mime_decode(<<"bw=====">>),
+ %% Test misc embedded padding
+ <<"one">> = base64:mime_decode(<<"b2=5l===">>),
+ <<"on">> = base64:mime_decode(<<"b=24====">>),
+ <<"o">> = base64:mime_decode(<<"b=w=====">>),
+ %% Test misc white space and illegals with embedded padding
+ <<"one">> = base64:mime_decode(<<" b~2=\r\n5()l===">>),
+ <<"on">> = base64:mime_decode(<<"\tb =2\"�4=�= ==">>),
+ <<"o">> = base64:mime_decode(<<"\nb=w=====">>),
+ %% Two pads
+ <<"Aladdin:open sesame">> =
base64:mime_decode("QWxhZGRpbjpvc()GVuIHNlc2FtZQ=="),
- %% One pad, followed by ignored text
- <<"Hello World">> = base64:mime_decode(<<"SGVsb)(G8gV29ybGQ=apa">>),
+ %% One pad to ignore, followed by more text
+ <<"Hello World!!">> = base64:mime_decode(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>),
+ %% No pad
+ <<"Aladdin:open sesam">> =
+ base64:mime_decode("QWxhZGRpbjpvcG�\")(VuIHNlc2Ft"),
+ %% Encoded base 64 strings may be divided by non base 64 chars.
+ %% In this cases whitespaces.
+ <<"0123456789!@#0^&*();:<>,. []{}">> =
+ base64:mime_decode(
+ <<"MDEy MzQ1Njc4 \tOSFAIzBeJ \nio)(oKTs6 PD4sLi \r\nBbXXt9">>),
+ ok.
+
+%%-------------------------------------------------------------------------
+
+%% Repeat of mime_decode() tests
+mime_decode_to_string(doc) ->
+ ["Test base64:mime_decode_to_string/1."];
+mime_decode_to_string(suite) ->
+ [];
+mime_decode_to_string(Config) when is_list(Config) ->
+ %% Test correct padding
+ "one" = base64:mime_decode_to_string(<<"b25l">>),
+ "on" = base64:mime_decode_to_string(<<"b24=">>),
+ "o" = base64:mime_decode_to_string(<<"bw==">>),
+ %% Test 1 extra padding
+ "one" = base64:mime_decode_to_string(<<"b25l= =">>),
+ "on" = base64:mime_decode_to_string(<<"b24== =">>),
+ "o" = base64:mime_decode_to_string(<<"bw=== =">>),
+ %% Test 2 extra padding
+ "one" = base64:mime_decode_to_string(<<"b25l===">>),
+ "on" = base64:mime_decode_to_string(<<"b24====">>),
+ "o" = base64:mime_decode_to_string(<<"bw=====">>),
+ %% Test misc embedded padding
+ "one" = base64:mime_decode_to_string(<<"b2=5l===">>),
+ "on" = base64:mime_decode_to_string(<<"b=24====">>),
+ "o" = base64:mime_decode_to_string(<<"b=w=====">>),
+ %% Test misc white space and illegals with embedded padding
+ "one" = base64:mime_decode_to_string(<<" b~2=\r\n5()l===">>),
+ "on" = base64:mime_decode_to_string(<<"\tb =2\"�4=�= ==">>),
+ "o" = base64:mime_decode_to_string(<<"\nb=w=====">>),
+ %% Two pads
+ "Aladdin:open sesame" =
+ base64:mime_decode_to_string("QWxhZGRpbjpvc()GVuIHNlc2FtZQ=="),
+ %% One pad to ignore, followed by more text
+ "Hello World!!" = base64:mime_decode_to_string(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>),
%% No pad
"Aladdin:open sesam" =
base64:mime_decode_to_string("QWxhZGRpbjpvcG�\")(VuIHNlc2Ft"),
-
%% Encoded base 64 strings may be divided by non base 64 chars.
%% In this cases whitespaces.
"0123456789!@#0^&*();:<>,. []{}" =
@@ -159,6 +224,7 @@ mime_decode(Config) when is_list(Config) ->
<<"MDEy MzQ1Njc4 \tOSFAIzBeJ \nio)(oKTs6 PD4sLi \r\nBbXXt9">>),
ok.
+%%-------------------------------------------------------------------------
roundtrip(Config) when is_list(Config) ->
Sizes = lists:seq(1, 255) ++ lists:seq(2400-5, 2440),