diff options
author | Ingela Anderton Andin <[email protected]> | 2011-01-11 10:11:23 +0100 |
---|---|---|
committer | Ingela Anderton Andin <[email protected]> | 2011-01-11 10:11:43 +0100 |
commit | ab1c28eadc968d8475ce42a97b2ae143590d5cc1 (patch) | |
tree | 21819ccca682ecb1ca45361240a0ef182618c5d7 /lib/stdlib/src | |
parent | 095d5ebc0f739982b55b95fdd9919a6331b4b2b1 (diff) | |
parent | 105fc6e844319a34df72efe6b2eb4823626a7957 (diff) | |
download | otp-ab1c28eadc968d8475ce42a97b2ae143590d5cc1.tar.gz otp-ab1c28eadc968d8475ce42a97b2ae143590d5cc1.tar.bz2 otp-ab1c28eadc968d8475ce42a97b2ae143590d5cc1.zip |
Merge branch 'td/base64-mime-decoding' into dev
* td/base64-mime-decoding:
Improve pad character handling in base64 MIME decoding functions
OTP-9020
Diffstat (limited to 'lib/stdlib/src')
-rw-r--r-- | lib/stdlib/src/base64.erl | 111 |
1 files changed, 79 insertions, 32 deletions
diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl index ebef998ee1..69d864051c 100644 --- a/lib/stdlib/src/base64.erl +++ b/lib/stdlib/src/base64.erl @@ -114,7 +114,7 @@ decode(List) when is_list(List) -> mime_decode(Bin) when is_binary(Bin) -> mime_decode_binary(<<>>, Bin); mime_decode(List) when is_list(List) -> - list_to_binary(mime_decode_l(List)). + mime_decode(list_to_binary(List)). -spec decode_l(string()) -> string(). @@ -125,7 +125,7 @@ decode_l(List) -> -spec mime_decode_l(string()) -> string(). mime_decode_l(List) -> - L = strip_illegal(List, []), + L = strip_illegal(List, [], 0), decode(L, []). %%------------------------------------------------------------------------- @@ -198,6 +198,9 @@ decode_binary(Result, <<>>) -> true = is_binary(Result), Result. +%% Skipping pad character if not at end of string. Also liberal about +%% excess padding and skipping of other illegal (non-base64 alphabet) +%% characters. See section 3.3 of RFC4648 mime_decode_binary(Result, <<0:8,T/bits>>) -> mime_decode_binary(Result, T); mime_decode_binary(Result0, <<C:8,T/bits>>) -> @@ -205,15 +208,27 @@ mime_decode_binary(Result0, <<C:8,T/bits>>) -> Bits when is_integer(Bits) -> mime_decode_binary(<<Result0/bits,Bits:6>>, T); eq -> - case tail_contains_equal(T) of - true -> - Split = byte_size(Result0) - 1, - <<Result:Split/bytes,_:4>> = Result0, - Result; - false -> - Split = byte_size(Result0) - 1, - <<Result:Split/bytes,_:2>> = Result0, - Result + case tail_contains_more(T, false) of + {<<>>, Eq} -> + %% No more valid data. + case bit_size(Result0) rem 8 of + 0 -> + %% '====' is not uncommon. + Result0; + 4 when Eq -> + %% enforce at least one more '=' only ignoring illegals and spacing + Split = size(Result0), + <<Result:Split/bytes,_:4>> = Result0, + Result; + 2 -> + %% remove 2 bits + Split = size(Result0), + <<Result:Split/bytes,_:2>> = Result0, + Result + end; + {More, _} -> + %% More valid data, skip the eq as invalid + mime_decode_binary(Result0, More) end; _ -> mime_decode_binary(Result0, T) @@ -262,31 +277,63 @@ strip_ws(<<$\s,T/binary>>) -> strip_ws(T); strip_ws(T) -> T. -strip_illegal([0|Cs], A) -> - strip_illegal(Cs, A); -strip_illegal([C|Cs], A) -> +%% Skipping pad character if not at end of string. Also liberal about +%% excess padding and skipping of other illegal (non-base64 alphabet) +%% characters. See section 3.3 of RFC4648 +strip_illegal([], A, _Cnt) -> + A; +strip_illegal([0|Cs], A, Cnt) -> + strip_illegal(Cs, A, Cnt); +strip_illegal([C|Cs], A, Cnt) -> case element(C, ?DECODE_MAP) of - bad -> strip_illegal(Cs, A); - ws -> strip_illegal(Cs, A); - eq -> strip_illegal_end(Cs, [$=|A]); - _ -> strip_illegal(Cs, [C|A]) - end; -strip_illegal([], A) -> A. + bad -> + strip_illegal(Cs, A, Cnt); + ws -> + strip_illegal(Cs, A, Cnt); + eq -> + case {tail_contains_more(Cs, false), Cnt rem 4} of + {{[], _}, 0} -> + A; %% Ignore extra = + {{[], true}, 2} -> + [$=|[$=|A]]; %% 'XX==' + {{[], _}, 3} -> + [$=|A]; %% 'XXX=' + {{[H|T], _}, _} -> + %% more data, skip equals + strip_illegal(T, [H|A], Cnt+1) + end; + _ -> + strip_illegal(Cs, [C|A], Cnt+1) + end. -strip_illegal_end([0|Cs], A) -> - strip_illegal_end(Cs, A); -strip_illegal_end([C|Cs], A) -> +%% Search the tail for more valid data and remember if we saw +%% another equals along the way. +tail_contains_more([], Eq) -> + {[], Eq}; +tail_contains_more(<<>>, Eq) -> + {<<>>, Eq}; +tail_contains_more([C|T]=More, Eq) -> case element(C, ?DECODE_MAP) of - bad -> strip_illegal(Cs, A); - ws -> strip_illegal(Cs, A); - eq -> [C|A]; - _ -> strip_illegal(Cs, [C|A]) + bad -> + tail_contains_more(T, Eq); + ws -> + tail_contains_more(T, Eq); + eq -> + tail_contains_more(T, true); + _ -> + {More, Eq} end; -strip_illegal_end([], A) -> A. - -tail_contains_equal(<<$=,_/binary>>) -> true; -tail_contains_equal(<<_,T/binary>>) -> tail_contains_equal(T); -tail_contains_equal(<<>>) -> false. +tail_contains_more(<<C:8,T/bits>> =More, Eq) -> + case element(C, ?DECODE_MAP) of + bad -> + tail_contains_more(T, Eq); + ws -> + tail_contains_more(T, Eq); + eq -> + tail_contains_more(T, true); + _ -> + {More, Eq} + end. %% accessors b64e(X) -> |