Improve pad character handling in base64 MIME decoding functions

Implement the 'MAY' clauses from RFC4648 regarding the pad character to make mime_decode() and mime_decode_to_string() functions more tolerant of badly padded base64. The RFC is quoted below for easy reference. RFC4648 Section 3.3 with reference to MIME decoding: Furthermore, such specifications MAY ignore the pad character, "=", treating it as non-alphabet data, if it is present before the end of the encoded data. If more than the allowed number of pad characters is found at the end of the string (e.g., a base 64 string terminated with "==="), the excess pad characters MAY also be ignored.
author: Thomas O'Dowd <[email protected]> 2010-12-07 10:58:17 +0900
committer: Thomas O'Dowd <[email protected]> 2010-12-07 10:58:17 +0900
commit: 105fc6e844319a34df72efe6b2eb4823626a7957 (patch)
tree: 122f4abb07b74c9e419acf591a44581fc543d739 /lib/stdlib/src
parent: 74ec1253e1a89f0bb266831f73716a24c0798dc1 (diff)
download: otp-105fc6e844319a34df72efe6b2eb4823626a7957.tar.gz
otp-105fc6e844319a34df72efe6b2eb4823626a7957.tar.bz2
otp-105fc6e844319a34df72efe6b2eb4823626a7957.zip
1 files changed, 79 insertions, 32 deletions
diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl
index ebef998ee1..69d864051c 100644
--- a/lib/stdlib/src/base64.erl
+++ b/lib/stdlib/src/base64.erl
@@ -114,7 +114,7 @@ decode(List) when is_list(List) ->
 mime_decode(Bin) when is_binary(Bin) ->
     mime_decode_binary(<<>>, Bin);
 mime_decode(List) when is_list(List) ->
-    list_to_binary(mime_decode_l(List)).
+    mime_decode(list_to_binary(List)).
 
 -spec decode_l(string()) -> string().
 
@@ -125,7 +125,7 @@ decode_l(List) ->
 -spec mime_decode_l(string()) -> string().
 
 mime_decode_l(List) ->
-    L = strip_illegal(List, []),
+    L = strip_illegal(List, [], 0),
     decode(L, []).
 
 %%-------------------------------------------------------------------------
@@ -198,6 +198,9 @@ decode_binary(Result, <<>>) ->
     true = is_binary(Result),
     Result.
 
+%% Skipping pad character if not at end of string. Also liberal about
+%% excess padding and skipping of other illegal (non-base64 alphabet)
+%% characters. See section 3.3 of RFC4648
 mime_decode_binary(Result, <<0:8,T/bits>>) ->
     mime_decode_binary(Result, T);
 mime_decode_binary(Result0, <<C:8,T/bits>>) ->
@@ -205,15 +208,27 @@ mime_decode_binary(Result0, <<C:8,T/bits>>) ->
 	Bits when is_integer(Bits) ->
 	    mime_decode_binary(<<Result0/bits,Bits:6>>, T);
 	eq ->
-	    case tail_contains_equal(T) of
-		true ->
-		    Split = byte_size(Result0) - 1,
-		    <<Result:Split/bytes,_:4>> = Result0,
-		    Result;
-		false ->
-		    Split = byte_size(Result0) - 1,
-		    <<Result:Split/bytes,_:2>> = Result0,
-		    Result
+	    case tail_contains_more(T, false) of
+		{<<>>, Eq} ->
+		    %% No more valid data.
+		    case bit_size(Result0) rem 8 of
+			0 ->
+			    %% '====' is not uncommon.
+			    Result0;
+			4 when Eq ->
+			    %% enforce at least one more '=' only ignoring illegals and spacing
+			    Split = size(Result0),
+			    <<Result:Split/bytes,_:4>> = Result0,
+			    Result;
+			2 ->
+			    %% remove 2 bits
+			    Split = size(Result0),
+			    <<Result:Split/bytes,_:2>> = Result0,
+			    Result
+		    end;
+		{More, _} ->
+		    %% More valid data, skip the eq as invalid
+		    mime_decode_binary(Result0, More)
 	    end;
 	_ ->
 	    mime_decode_binary(Result0, T)
@@ -262,31 +277,63 @@ strip_ws(<<$\s,T/binary>>) ->
     strip_ws(T);
 strip_ws(T) -> T.
 
-strip_illegal([0|Cs], A) ->
-    strip_illegal(Cs, A);
-strip_illegal([C|Cs], A) ->
+%% Skipping pad character if not at end of string. Also liberal about
+%% excess padding and skipping of other illegal (non-base64 alphabet)
+%% characters. See section 3.3 of RFC4648
+strip_illegal([], A, _Cnt) ->
+    A;
+strip_illegal([0|Cs], A, Cnt) ->
+    strip_illegal(Cs, A, Cnt);
+strip_illegal([C|Cs], A, Cnt) ->
     case element(C, ?DECODE_MAP) of
-	bad -> strip_illegal(Cs, A);
-	ws -> strip_illegal(Cs, A);
-	eq -> strip_illegal_end(Cs, [$=|A]);
-	_ -> strip_illegal(Cs, [C|A])
-    end;
-strip_illegal([], A) -> A.
+	bad ->
+	    strip_illegal(Cs, A, Cnt);
+	ws ->
+	    strip_illegal(Cs, A, Cnt);
+	eq ->
+	    case {tail_contains_more(Cs, false), Cnt rem 4} of
+		{{[], _}, 0} ->
+		    A;            %% Ignore extra =
+		{{[], true}, 2} ->
+		    [$=|[$=|A]];  %% 'XX=='
+		{{[], _}, 3} ->
+		    [$=|A];       %% 'XXX='
+		{{[H|T], _}, _} ->
+		    %% more data, skip equals
+		    strip_illegal(T, [H|A], Cnt+1)
+	    end;
+	_ ->
+	    strip_illegal(Cs, [C|A], Cnt+1)
+    end.
 
-strip_illegal_end([0|Cs], A) ->
-    strip_illegal_end(Cs, A);
-strip_illegal_end([C|Cs], A) ->
+%% Search the tail for more valid data and remember if we saw
+%% another equals along the way.
+tail_contains_more([], Eq) ->
+    {[], Eq};
+tail_contains_more(<<>>, Eq) ->
+    {<<>>, Eq};
+tail_contains_more([C|T]=More, Eq) ->
     case element(C, ?DECODE_MAP) of
-	bad -> strip_illegal(Cs, A);
-	ws -> strip_illegal(Cs, A);
-	eq -> [C|A];
-	_ -> strip_illegal(Cs, [C|A])
+	bad ->
+	    tail_contains_more(T, Eq);
+	ws ->
+	    tail_contains_more(T, Eq);
+	eq ->
+	    tail_contains_more(T, true);
+	_ ->
+	    {More, Eq}
     end;
-strip_illegal_end([], A) -> A.
-
-tail_contains_equal(<<$=,_/binary>>) -> true;
-tail_contains_equal(<<_,T/binary>>) -> tail_contains_equal(T);
-tail_contains_equal(<<>>) -> false.
+tail_contains_more(<<C:8,T/bits>> =More, Eq) ->
+    case element(C, ?DECODE_MAP) of
+	bad ->
+	    tail_contains_more(T, Eq);
+	ws ->
+	    tail_contains_more(T, Eq);
+	eq ->
+	    tail_contains_more(T, true);
+	_ ->
+	    {More, Eq}
+    end.
     
 %% accessors 
 b64e(X) ->
author	Thomas O'Dowd <[email protected]>	2010-12-07 10:58:17 +0900
committer	Thomas O'Dowd <[email protected]>	2010-12-07 10:58:17 +0900
commit	105fc6e844319a34df72efe6b2eb4823626a7957 (patch)
tree	122f4abb07b74c9e419acf591a44581fc543d739 /lib/stdlib/src
parent	74ec1253e1a89f0bb266831f73716a24c0798dc1 (diff)
download	otp-105fc6e844319a34df72efe6b2eb4823626a7957.tar.gz otp-105fc6e844319a34df72efe6b2eb4823626a7957.tar.bz2 otp-105fc6e844319a34df72efe6b2eb4823626a7957.zip