Merge branch 'td/base64-mime-decoding' into dev

* td/base64-mime-decoding: Improve pad character handling in base64 MIME decoding functions OTP-9020
author: Ingela Anderton Andin <[email protected]> 2011-01-11 10:11:23 +0100
committer: Ingela Anderton Andin <[email protected]> 2011-01-11 10:11:43 +0100
commit: ab1c28eadc968d8475ce42a97b2ae143590d5cc1 (patch)
tree: 21819ccca682ecb1ca45361240a0ef182618c5d7 /lib/stdlib/src
parent: 095d5ebc0f739982b55b95fdd9919a6331b4b2b1 (diff)
parent: 105fc6e844319a34df72efe6b2eb4823626a7957 (diff)
download: otp-ab1c28eadc968d8475ce42a97b2ae143590d5cc1.tar.gz
otp-ab1c28eadc968d8475ce42a97b2ae143590d5cc1.tar.bz2
otp-ab1c28eadc968d8475ce42a97b2ae143590d5cc1.zip
1 files changed, 79 insertions, 32 deletions
diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl
index ebef998ee1..69d864051c 100644
--- a/lib/stdlib/src/base64.erl
+++ b/lib/stdlib/src/base64.erl
@@ -114,7 +114,7 @@ decode(List) when is_list(List) ->
 mime_decode(Bin) when is_binary(Bin) ->
     mime_decode_binary(<<>>, Bin);
 mime_decode(List) when is_list(List) ->
-    list_to_binary(mime_decode_l(List)).
+    mime_decode(list_to_binary(List)).
 
 -spec decode_l(string()) -> string().
 
@@ -125,7 +125,7 @@ decode_l(List) ->
 -spec mime_decode_l(string()) -> string().
 
 mime_decode_l(List) ->
-    L = strip_illegal(List, []),
+    L = strip_illegal(List, [], 0),
     decode(L, []).
 
 %%-------------------------------------------------------------------------
@@ -198,6 +198,9 @@ decode_binary(Result, <<>>) ->
     true = is_binary(Result),
     Result.
 
+%% Skipping pad character if not at end of string. Also liberal about
+%% excess padding and skipping of other illegal (non-base64 alphabet)
+%% characters. See section 3.3 of RFC4648
 mime_decode_binary(Result, <<0:8,T/bits>>) ->
     mime_decode_binary(Result, T);
 mime_decode_binary(Result0, <<C:8,T/bits>>) ->
@@ -205,15 +208,27 @@ mime_decode_binary(Result0, <<C:8,T/bits>>) ->
 	Bits when is_integer(Bits) ->
 	    mime_decode_binary(<<Result0/bits,Bits:6>>, T);
 	eq ->
-	    case tail_contains_equal(T) of
-		true ->
-		    Split = byte_size(Result0) - 1,
-		    <<Result:Split/bytes,_:4>> = Result0,
-		    Result;
-		false ->
-		    Split = byte_size(Result0) - 1,
-		    <<Result:Split/bytes,_:2>> = Result0,
-		    Result
+	    case tail_contains_more(T, false) of
+		{<<>>, Eq} ->
+		    %% No more valid data.
+		    case bit_size(Result0) rem 8 of
+			0 ->
+			    %% '====' is not uncommon.
+			    Result0;
+			4 when Eq ->
+			    %% enforce at least one more '=' only ignoring illegals and spacing
+			    Split = size(Result0),
+			    <<Result:Split/bytes,_:4>> = Result0,
+			    Result;
+			2 ->
+			    %% remove 2 bits
+			    Split = size(Result0),
+			    <<Result:Split/bytes,_:2>> = Result0,
+			    Result
+		    end;
+		{More, _} ->
+		    %% More valid data, skip the eq as invalid
+		    mime_decode_binary(Result0, More)
 	    end;
 	_ ->
 	    mime_decode_binary(Result0, T)
@@ -262,31 +277,63 @@ strip_ws(<<$\s,T/binary>>) ->
     strip_ws(T);
 strip_ws(T) -> T.
 
-strip_illegal([0|Cs], A) ->
-    strip_illegal(Cs, A);
-strip_illegal([C|Cs], A) ->
+%% Skipping pad character if not at end of string. Also liberal about
+%% excess padding and skipping of other illegal (non-base64 alphabet)
+%% characters. See section 3.3 of RFC4648
+strip_illegal([], A, _Cnt) ->
+    A;
+strip_illegal([0|Cs], A, Cnt) ->
+    strip_illegal(Cs, A, Cnt);
+strip_illegal([C|Cs], A, Cnt) ->
     case element(C, ?DECODE_MAP) of
-	bad -> strip_illegal(Cs, A);
-	ws -> strip_illegal(Cs, A);
-	eq -> strip_illegal_end(Cs, [$=|A]);
-	_ -> strip_illegal(Cs, [C|A])
-    end;
-strip_illegal([], A) -> A.
+	bad ->
+	    strip_illegal(Cs, A, Cnt);
+	ws ->
+	    strip_illegal(Cs, A, Cnt);
+	eq ->
+	    case {tail_contains_more(Cs, false), Cnt rem 4} of
+		{{[], _}, 0} ->
+		    A;            %% Ignore extra =
+		{{[], true}, 2} ->
+		    [$=|[$=|A]];  %% 'XX=='
+		{{[], _}, 3} ->
+		    [$=|A];       %% 'XXX='
+		{{[H|T], _}, _} ->
+		    %% more data, skip equals
+		    strip_illegal(T, [H|A], Cnt+1)
+	    end;
+	_ ->
+	    strip_illegal(Cs, [C|A], Cnt+1)
+    end.
 
-strip_illegal_end([0|Cs], A) ->
-    strip_illegal_end(Cs, A);
-strip_illegal_end([C|Cs], A) ->
+%% Search the tail for more valid data and remember if we saw
+%% another equals along the way.
+tail_contains_more([], Eq) ->
+    {[], Eq};
+tail_contains_more(<<>>, Eq) ->
+    {<<>>, Eq};
+tail_contains_more([C|T]=More, Eq) ->
     case element(C, ?DECODE_MAP) of
-	bad -> strip_illegal(Cs, A);
-	ws -> strip_illegal(Cs, A);
-	eq -> [C|A];
-	_ -> strip_illegal(Cs, [C|A])
+	bad ->
+	    tail_contains_more(T, Eq);
+	ws ->
+	    tail_contains_more(T, Eq);
+	eq ->
+	    tail_contains_more(T, true);
+	_ ->
+	    {More, Eq}
     end;
-strip_illegal_end([], A) -> A.
-
-tail_contains_equal(<<$=,_/binary>>) -> true;
-tail_contains_equal(<<_,T/binary>>) -> tail_contains_equal(T);
-tail_contains_equal(<<>>) -> false.
+tail_contains_more(<<C:8,T/bits>> =More, Eq) ->
+    case element(C, ?DECODE_MAP) of
+	bad ->
+	    tail_contains_more(T, Eq);
+	ws ->
+	    tail_contains_more(T, Eq);
+	eq ->
+	    tail_contains_more(T, true);
+	_ ->
+	    {More, Eq}
+    end.
     
 %% accessors 
 b64e(X) ->
author	Ingela Anderton Andin <[email protected]>	2011-01-11 10:11:23 +0100
committer	Ingela Anderton Andin <[email protected]>	2011-01-11 10:11:43 +0100
commit	ab1c28eadc968d8475ce42a97b2ae143590d5cc1 (patch)
tree	21819ccca682ecb1ca45361240a0ef182618c5d7 /lib/stdlib/src
parent	095d5ebc0f739982b55b95fdd9919a6331b4b2b1 (diff)
parent	105fc6e844319a34df72efe6b2eb4823626a7957 (diff)
download	otp-ab1c28eadc968d8475ce42a97b2ae143590d5cc1.tar.gz otp-ab1c28eadc968d8475ce42a97b2ae143590d5cc1.tar.bz2 otp-ab1c28eadc968d8475ce42a97b2ae143590d5cc1.zip