diff options
author | Thomas O'Dowd <[email protected]> | 2010-12-07 10:58:17 +0900 |
---|---|---|
committer | Thomas O'Dowd <[email protected]> | 2010-12-07 10:58:17 +0900 |
commit | 105fc6e844319a34df72efe6b2eb4823626a7957 (patch) | |
tree | 122f4abb07b74c9e419acf591a44581fc543d739 /lib/stdlib/src | |
parent | 74ec1253e1a89f0bb266831f73716a24c0798dc1 (diff) | |
download | otp-105fc6e844319a34df72efe6b2eb4823626a7957.tar.gz otp-105fc6e844319a34df72efe6b2eb4823626a7957.tar.bz2 otp-105fc6e844319a34df72efe6b2eb4823626a7957.zip |
Improve pad character handling in base64 MIME decoding functions
Implement the 'MAY' clauses from RFC4648 regarding the pad character
to make mime_decode() and mime_decode_to_string() functions more
tolerant of badly padded base64. The RFC is quoted below for easy
reference.
RFC4648 Section 3.3 with reference to MIME decoding:
Furthermore, such specifications MAY ignore the pad character, "=",
treating it as non-alphabet data, if it is present before the end of
the encoded data. If more than the allowed number of pad characters
is found at the end of the string (e.g., a base 64 string terminated
with "==="), the excess pad characters MAY also be ignored.
Diffstat (limited to 'lib/stdlib/src')
-rw-r--r-- | lib/stdlib/src/base64.erl | 111 |
1 files changed, 79 insertions, 32 deletions
diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl index ebef998ee1..69d864051c 100644 --- a/lib/stdlib/src/base64.erl +++ b/lib/stdlib/src/base64.erl @@ -114,7 +114,7 @@ decode(List) when is_list(List) -> mime_decode(Bin) when is_binary(Bin) -> mime_decode_binary(<<>>, Bin); mime_decode(List) when is_list(List) -> - list_to_binary(mime_decode_l(List)). + mime_decode(list_to_binary(List)). -spec decode_l(string()) -> string(). @@ -125,7 +125,7 @@ decode_l(List) -> -spec mime_decode_l(string()) -> string(). mime_decode_l(List) -> - L = strip_illegal(List, []), + L = strip_illegal(List, [], 0), decode(L, []). %%------------------------------------------------------------------------- @@ -198,6 +198,9 @@ decode_binary(Result, <<>>) -> true = is_binary(Result), Result. +%% Skipping pad character if not at end of string. Also liberal about +%% excess padding and skipping of other illegal (non-base64 alphabet) +%% characters. See section 3.3 of RFC4648 mime_decode_binary(Result, <<0:8,T/bits>>) -> mime_decode_binary(Result, T); mime_decode_binary(Result0, <<C:8,T/bits>>) -> @@ -205,15 +208,27 @@ mime_decode_binary(Result0, <<C:8,T/bits>>) -> Bits when is_integer(Bits) -> mime_decode_binary(<<Result0/bits,Bits:6>>, T); eq -> - case tail_contains_equal(T) of - true -> - Split = byte_size(Result0) - 1, - <<Result:Split/bytes,_:4>> = Result0, - Result; - false -> - Split = byte_size(Result0) - 1, - <<Result:Split/bytes,_:2>> = Result0, - Result + case tail_contains_more(T, false) of + {<<>>, Eq} -> + %% No more valid data. + case bit_size(Result0) rem 8 of + 0 -> + %% '====' is not uncommon. + Result0; + 4 when Eq -> + %% enforce at least one more '=' only ignoring illegals and spacing + Split = size(Result0), + <<Result:Split/bytes,_:4>> = Result0, + Result; + 2 -> + %% remove 2 bits + Split = size(Result0), + <<Result:Split/bytes,_:2>> = Result0, + Result + end; + {More, _} -> + %% More valid data, skip the eq as invalid + mime_decode_binary(Result0, More) end; _ -> mime_decode_binary(Result0, T) @@ -262,31 +277,63 @@ strip_ws(<<$\s,T/binary>>) -> strip_ws(T); strip_ws(T) -> T. -strip_illegal([0|Cs], A) -> - strip_illegal(Cs, A); -strip_illegal([C|Cs], A) -> +%% Skipping pad character if not at end of string. Also liberal about +%% excess padding and skipping of other illegal (non-base64 alphabet) +%% characters. See section 3.3 of RFC4648 +strip_illegal([], A, _Cnt) -> + A; +strip_illegal([0|Cs], A, Cnt) -> + strip_illegal(Cs, A, Cnt); +strip_illegal([C|Cs], A, Cnt) -> case element(C, ?DECODE_MAP) of - bad -> strip_illegal(Cs, A); - ws -> strip_illegal(Cs, A); - eq -> strip_illegal_end(Cs, [$=|A]); - _ -> strip_illegal(Cs, [C|A]) - end; -strip_illegal([], A) -> A. + bad -> + strip_illegal(Cs, A, Cnt); + ws -> + strip_illegal(Cs, A, Cnt); + eq -> + case {tail_contains_more(Cs, false), Cnt rem 4} of + {{[], _}, 0} -> + A; %% Ignore extra = + {{[], true}, 2} -> + [$=|[$=|A]]; %% 'XX==' + {{[], _}, 3} -> + [$=|A]; %% 'XXX=' + {{[H|T], _}, _} -> + %% more data, skip equals + strip_illegal(T, [H|A], Cnt+1) + end; + _ -> + strip_illegal(Cs, [C|A], Cnt+1) + end. -strip_illegal_end([0|Cs], A) -> - strip_illegal_end(Cs, A); -strip_illegal_end([C|Cs], A) -> +%% Search the tail for more valid data and remember if we saw +%% another equals along the way. +tail_contains_more([], Eq) -> + {[], Eq}; +tail_contains_more(<<>>, Eq) -> + {<<>>, Eq}; +tail_contains_more([C|T]=More, Eq) -> case element(C, ?DECODE_MAP) of - bad -> strip_illegal(Cs, A); - ws -> strip_illegal(Cs, A); - eq -> [C|A]; - _ -> strip_illegal(Cs, [C|A]) + bad -> + tail_contains_more(T, Eq); + ws -> + tail_contains_more(T, Eq); + eq -> + tail_contains_more(T, true); + _ -> + {More, Eq} end; -strip_illegal_end([], A) -> A. - -tail_contains_equal(<<$=,_/binary>>) -> true; -tail_contains_equal(<<_,T/binary>>) -> tail_contains_equal(T); -tail_contains_equal(<<>>) -> false. +tail_contains_more(<<C:8,T/bits>> =More, Eq) -> + case element(C, ?DECODE_MAP) of + bad -> + tail_contains_more(T, Eq); + ws -> + tail_contains_more(T, Eq); + eq -> + tail_contains_more(T, true); + _ -> + {More, Eq} + end. %% accessors b64e(X) -> |