aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib/src
diff options
context:
space:
mode:
authorIngela Anderton Andin <[email protected]>2011-01-11 10:11:23 +0100
committerIngela Anderton Andin <[email protected]>2011-01-11 10:11:43 +0100
commitab1c28eadc968d8475ce42a97b2ae143590d5cc1 (patch)
tree21819ccca682ecb1ca45361240a0ef182618c5d7 /lib/stdlib/src
parent095d5ebc0f739982b55b95fdd9919a6331b4b2b1 (diff)
parent105fc6e844319a34df72efe6b2eb4823626a7957 (diff)
downloadotp-ab1c28eadc968d8475ce42a97b2ae143590d5cc1.tar.gz
otp-ab1c28eadc968d8475ce42a97b2ae143590d5cc1.tar.bz2
otp-ab1c28eadc968d8475ce42a97b2ae143590d5cc1.zip
Merge branch 'td/base64-mime-decoding' into dev
* td/base64-mime-decoding: Improve pad character handling in base64 MIME decoding functions OTP-9020
Diffstat (limited to 'lib/stdlib/src')
-rw-r--r--lib/stdlib/src/base64.erl111
1 files changed, 79 insertions, 32 deletions
diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl
index ebef998ee1..69d864051c 100644
--- a/lib/stdlib/src/base64.erl
+++ b/lib/stdlib/src/base64.erl
@@ -114,7 +114,7 @@ decode(List) when is_list(List) ->
mime_decode(Bin) when is_binary(Bin) ->
mime_decode_binary(<<>>, Bin);
mime_decode(List) when is_list(List) ->
- list_to_binary(mime_decode_l(List)).
+ mime_decode(list_to_binary(List)).
-spec decode_l(string()) -> string().
@@ -125,7 +125,7 @@ decode_l(List) ->
-spec mime_decode_l(string()) -> string().
mime_decode_l(List) ->
- L = strip_illegal(List, []),
+ L = strip_illegal(List, [], 0),
decode(L, []).
%%-------------------------------------------------------------------------
@@ -198,6 +198,9 @@ decode_binary(Result, <<>>) ->
true = is_binary(Result),
Result.
+%% Skipping pad character if not at end of string. Also liberal about
+%% excess padding and skipping of other illegal (non-base64 alphabet)
+%% characters. See section 3.3 of RFC4648
mime_decode_binary(Result, <<0:8,T/bits>>) ->
mime_decode_binary(Result, T);
mime_decode_binary(Result0, <<C:8,T/bits>>) ->
@@ -205,15 +208,27 @@ mime_decode_binary(Result0, <<C:8,T/bits>>) ->
Bits when is_integer(Bits) ->
mime_decode_binary(<<Result0/bits,Bits:6>>, T);
eq ->
- case tail_contains_equal(T) of
- true ->
- Split = byte_size(Result0) - 1,
- <<Result:Split/bytes,_:4>> = Result0,
- Result;
- false ->
- Split = byte_size(Result0) - 1,
- <<Result:Split/bytes,_:2>> = Result0,
- Result
+ case tail_contains_more(T, false) of
+ {<<>>, Eq} ->
+ %% No more valid data.
+ case bit_size(Result0) rem 8 of
+ 0 ->
+ %% '====' is not uncommon.
+ Result0;
+ 4 when Eq ->
+ %% enforce at least one more '=' only ignoring illegals and spacing
+ Split = size(Result0),
+ <<Result:Split/bytes,_:4>> = Result0,
+ Result;
+ 2 ->
+ %% remove 2 bits
+ Split = size(Result0),
+ <<Result:Split/bytes,_:2>> = Result0,
+ Result
+ end;
+ {More, _} ->
+ %% More valid data, skip the eq as invalid
+ mime_decode_binary(Result0, More)
end;
_ ->
mime_decode_binary(Result0, T)
@@ -262,31 +277,63 @@ strip_ws(<<$\s,T/binary>>) ->
strip_ws(T);
strip_ws(T) -> T.
-strip_illegal([0|Cs], A) ->
- strip_illegal(Cs, A);
-strip_illegal([C|Cs], A) ->
+%% Skipping pad character if not at end of string. Also liberal about
+%% excess padding and skipping of other illegal (non-base64 alphabet)
+%% characters. See section 3.3 of RFC4648
+strip_illegal([], A, _Cnt) ->
+ A;
+strip_illegal([0|Cs], A, Cnt) ->
+ strip_illegal(Cs, A, Cnt);
+strip_illegal([C|Cs], A, Cnt) ->
case element(C, ?DECODE_MAP) of
- bad -> strip_illegal(Cs, A);
- ws -> strip_illegal(Cs, A);
- eq -> strip_illegal_end(Cs, [$=|A]);
- _ -> strip_illegal(Cs, [C|A])
- end;
-strip_illegal([], A) -> A.
+ bad ->
+ strip_illegal(Cs, A, Cnt);
+ ws ->
+ strip_illegal(Cs, A, Cnt);
+ eq ->
+ case {tail_contains_more(Cs, false), Cnt rem 4} of
+ {{[], _}, 0} ->
+ A; %% Ignore extra =
+ {{[], true}, 2} ->
+ [$=|[$=|A]]; %% 'XX=='
+ {{[], _}, 3} ->
+ [$=|A]; %% 'XXX='
+ {{[H|T], _}, _} ->
+ %% more data, skip equals
+ strip_illegal(T, [H|A], Cnt+1)
+ end;
+ _ ->
+ strip_illegal(Cs, [C|A], Cnt+1)
+ end.
-strip_illegal_end([0|Cs], A) ->
- strip_illegal_end(Cs, A);
-strip_illegal_end([C|Cs], A) ->
+%% Search the tail for more valid data and remember if we saw
+%% another equals along the way.
+tail_contains_more([], Eq) ->
+ {[], Eq};
+tail_contains_more(<<>>, Eq) ->
+ {<<>>, Eq};
+tail_contains_more([C|T]=More, Eq) ->
case element(C, ?DECODE_MAP) of
- bad -> strip_illegal(Cs, A);
- ws -> strip_illegal(Cs, A);
- eq -> [C|A];
- _ -> strip_illegal(Cs, [C|A])
+ bad ->
+ tail_contains_more(T, Eq);
+ ws ->
+ tail_contains_more(T, Eq);
+ eq ->
+ tail_contains_more(T, true);
+ _ ->
+ {More, Eq}
end;
-strip_illegal_end([], A) -> A.
-
-tail_contains_equal(<<$=,_/binary>>) -> true;
-tail_contains_equal(<<_,T/binary>>) -> tail_contains_equal(T);
-tail_contains_equal(<<>>) -> false.
+tail_contains_more(<<C:8,T/bits>> =More, Eq) ->
+ case element(C, ?DECODE_MAP) of
+ bad ->
+ tail_contains_more(T, Eq);
+ ws ->
+ tail_contains_more(T, Eq);
+ eq ->
+ tail_contains_more(T, true);
+ _ ->
+ {More, Eq}
+ end.
%% accessors
b64e(X) ->