aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib/src/base64.erl
diff options
context:
space:
mode:
authorThomas O'Dowd <[email protected]>2010-12-07 10:58:17 +0900
committerThomas O'Dowd <[email protected]>2010-12-07 10:58:17 +0900
commit105fc6e844319a34df72efe6b2eb4823626a7957 (patch)
tree122f4abb07b74c9e419acf591a44581fc543d739 /lib/stdlib/src/base64.erl
parent74ec1253e1a89f0bb266831f73716a24c0798dc1 (diff)
downloadotp-105fc6e844319a34df72efe6b2eb4823626a7957.tar.gz
otp-105fc6e844319a34df72efe6b2eb4823626a7957.tar.bz2
otp-105fc6e844319a34df72efe6b2eb4823626a7957.zip
Improve pad character handling in base64 MIME decoding functions
Implement the 'MAY' clauses from RFC4648 regarding the pad character to make mime_decode() and mime_decode_to_string() functions more tolerant of badly padded base64. The RFC is quoted below for easy reference. RFC4648 Section 3.3 with reference to MIME decoding: Furthermore, such specifications MAY ignore the pad character, "=", treating it as non-alphabet data, if it is present before the end of the encoded data. If more than the allowed number of pad characters is found at the end of the string (e.g., a base 64 string terminated with "==="), the excess pad characters MAY also be ignored.
Diffstat (limited to 'lib/stdlib/src/base64.erl')
-rw-r--r--lib/stdlib/src/base64.erl111
1 files changed, 79 insertions, 32 deletions
diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl
index ebef998ee1..69d864051c 100644
--- a/lib/stdlib/src/base64.erl
+++ b/lib/stdlib/src/base64.erl
@@ -114,7 +114,7 @@ decode(List) when is_list(List) ->
mime_decode(Bin) when is_binary(Bin) ->
mime_decode_binary(<<>>, Bin);
mime_decode(List) when is_list(List) ->
- list_to_binary(mime_decode_l(List)).
+ mime_decode(list_to_binary(List)).
-spec decode_l(string()) -> string().
@@ -125,7 +125,7 @@ decode_l(List) ->
-spec mime_decode_l(string()) -> string().
mime_decode_l(List) ->
- L = strip_illegal(List, []),
+ L = strip_illegal(List, [], 0),
decode(L, []).
%%-------------------------------------------------------------------------
@@ -198,6 +198,9 @@ decode_binary(Result, <<>>) ->
true = is_binary(Result),
Result.
+%% Skipping pad character if not at end of string. Also liberal about
+%% excess padding and skipping of other illegal (non-base64 alphabet)
+%% characters. See section 3.3 of RFC4648
mime_decode_binary(Result, <<0:8,T/bits>>) ->
mime_decode_binary(Result, T);
mime_decode_binary(Result0, <<C:8,T/bits>>) ->
@@ -205,15 +208,27 @@ mime_decode_binary(Result0, <<C:8,T/bits>>) ->
Bits when is_integer(Bits) ->
mime_decode_binary(<<Result0/bits,Bits:6>>, T);
eq ->
- case tail_contains_equal(T) of
- true ->
- Split = byte_size(Result0) - 1,
- <<Result:Split/bytes,_:4>> = Result0,
- Result;
- false ->
- Split = byte_size(Result0) - 1,
- <<Result:Split/bytes,_:2>> = Result0,
- Result
+ case tail_contains_more(T, false) of
+ {<<>>, Eq} ->
+ %% No more valid data.
+ case bit_size(Result0) rem 8 of
+ 0 ->
+ %% '====' is not uncommon.
+ Result0;
+ 4 when Eq ->
+ %% enforce at least one more '=' only ignoring illegals and spacing
+ Split = size(Result0),
+ <<Result:Split/bytes,_:4>> = Result0,
+ Result;
+ 2 ->
+ %% remove 2 bits
+ Split = size(Result0),
+ <<Result:Split/bytes,_:2>> = Result0,
+ Result
+ end;
+ {More, _} ->
+ %% More valid data, skip the eq as invalid
+ mime_decode_binary(Result0, More)
end;
_ ->
mime_decode_binary(Result0, T)
@@ -262,31 +277,63 @@ strip_ws(<<$\s,T/binary>>) ->
strip_ws(T);
strip_ws(T) -> T.
-strip_illegal([0|Cs], A) ->
- strip_illegal(Cs, A);
-strip_illegal([C|Cs], A) ->
+%% Skipping pad character if not at end of string. Also liberal about
+%% excess padding and skipping of other illegal (non-base64 alphabet)
+%% characters. See section 3.3 of RFC4648
+strip_illegal([], A, _Cnt) ->
+ A;
+strip_illegal([0|Cs], A, Cnt) ->
+ strip_illegal(Cs, A, Cnt);
+strip_illegal([C|Cs], A, Cnt) ->
case element(C, ?DECODE_MAP) of
- bad -> strip_illegal(Cs, A);
- ws -> strip_illegal(Cs, A);
- eq -> strip_illegal_end(Cs, [$=|A]);
- _ -> strip_illegal(Cs, [C|A])
- end;
-strip_illegal([], A) -> A.
+ bad ->
+ strip_illegal(Cs, A, Cnt);
+ ws ->
+ strip_illegal(Cs, A, Cnt);
+ eq ->
+ case {tail_contains_more(Cs, false), Cnt rem 4} of
+ {{[], _}, 0} ->
+ A; %% Ignore extra =
+ {{[], true}, 2} ->
+ [$=|[$=|A]]; %% 'XX=='
+ {{[], _}, 3} ->
+ [$=|A]; %% 'XXX='
+ {{[H|T], _}, _} ->
+ %% more data, skip equals
+ strip_illegal(T, [H|A], Cnt+1)
+ end;
+ _ ->
+ strip_illegal(Cs, [C|A], Cnt+1)
+ end.
-strip_illegal_end([0|Cs], A) ->
- strip_illegal_end(Cs, A);
-strip_illegal_end([C|Cs], A) ->
+%% Search the tail for more valid data and remember if we saw
+%% another equals along the way.
+tail_contains_more([], Eq) ->
+ {[], Eq};
+tail_contains_more(<<>>, Eq) ->
+ {<<>>, Eq};
+tail_contains_more([C|T]=More, Eq) ->
case element(C, ?DECODE_MAP) of
- bad -> strip_illegal(Cs, A);
- ws -> strip_illegal(Cs, A);
- eq -> [C|A];
- _ -> strip_illegal(Cs, [C|A])
+ bad ->
+ tail_contains_more(T, Eq);
+ ws ->
+ tail_contains_more(T, Eq);
+ eq ->
+ tail_contains_more(T, true);
+ _ ->
+ {More, Eq}
end;
-strip_illegal_end([], A) -> A.
-
-tail_contains_equal(<<$=,_/binary>>) -> true;
-tail_contains_equal(<<_,T/binary>>) -> tail_contains_equal(T);
-tail_contains_equal(<<>>) -> false.
+tail_contains_more(<<C:8,T/bits>> =More, Eq) ->
+ case element(C, ?DECODE_MAP) of
+ bad ->
+ tail_contains_more(T, Eq);
+ ws ->
+ tail_contains_more(T, Eq);
+ eq ->
+ tail_contains_more(T, true);
+ _ ->
+ {More, Eq}
+ end.
%% accessors
b64e(X) ->