aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib/src
diff options
context:
space:
mode:
authorHans Bolinder <[email protected]>2017-10-30 11:55:42 +0100
committerHans Bolinder <[email protected]>2017-11-28 13:03:46 +0100
commit3d28da78f3e8bab7a13f4f928cdd7747ab943197 (patch)
tree484c20082481420ecb97f7c204173d2a09ae1fe9 /lib/stdlib/src
parent779ea5fecf0e0ba525410a3dcb4ea84d12b5c77f (diff)
downloadotp-3d28da78f3e8bab7a13f4f928cdd7747ab943197.tar.gz
otp-3d28da78f3e8bab7a13f4f928cdd7747ab943197.tar.bz2
otp-3d28da78f3e8bab7a13f4f928cdd7747ab943197.zip
stdlib: Optimize base64 functions
A few test cases with zeroes are added. They were not handled correctly before. The access of DECODE_MAP is moved into the inlined function b64d, for symmetry. The function b64e is also inlined. The speed-up is small, but measurable. Note: encode(List), decode(List) and mime_decode(List) no longer call list_to_binary. This can break code that calls the functions with I/O-lists as input.
Diffstat (limited to 'lib/stdlib/src')
-rw-r--r--lib/stdlib/src/base64.erl699
1 files changed, 433 insertions, 266 deletions
diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl
index c8cf6fdffe..6311f3c86b 100644
--- a/lib/stdlib/src/base64.erl
+++ b/lib/stdlib/src/base64.erl
@@ -24,89 +24,95 @@
-export([encode/1, decode/1, mime_decode/1,
encode_to_string/1, decode_to_string/1, mime_decode_to_string/1]).
-%%-------------------------------------------------------------------------
%% The following type is a subtype of string() for return values
%% of (some) functions of this module.
-%%-------------------------------------------------------------------------
-
-type ascii_string() :: [1..255].
-type ascii_binary() :: binary().
-%%-------------------------------------------------------------------------
-%% encode_to_string(ASCII) -> Base64String
-%% ASCII - string() | binary()
-%% Base64String - string()
-%%
-%% Description: Encodes a plain ASCII string (or binary) into base64.
-%%-------------------------------------------------------------------------
-
-spec encode_to_string(Data) -> Base64String when
Data :: ascii_string() | ascii_binary(),
Base64String :: ascii_string().
encode_to_string(Bin) when is_binary(Bin) ->
- encode_to_string(binary_to_list(Bin));
+ encode_binary_to_string(Bin);
encode_to_string(List) when is_list(List) ->
- encode_l(List).
-
-%%-------------------------------------------------------------------------
-%% encode(ASCII) -> Base64
-%% ASCII - string() | binary()
-%% Base64 - binary()
-%%
-%% Description: Encodes a plain ASCII string (or binary) into base64.
-%%-------------------------------------------------------------------------
+ encode_list_to_string(List).
-spec encode(Data) -> Base64 when
Data :: ascii_string() | ascii_binary(),
Base64 :: ascii_binary().
encode(Bin) when is_binary(Bin) ->
- encode_binary(Bin);
+ encode_binary(Bin, <<>>);
encode(List) when is_list(List) ->
- list_to_binary(encode_l(List)).
+ encode_list(List, <<>>).
--spec encode_l(ascii_string()) -> ascii_string().
+encode_binary_to_string(<<>>) ->
+ [];
+encode_binary_to_string(<<B1:8>>) ->
+ [b64e(B1 bsr 2),
+ b64e((B1 band 3) bsl 4), $=, $=];
+encode_binary_to_string(<<B1:8, B2:8>>) ->
+ [b64e(B1 bsr 2),
+ b64e(((B1 band 3) bsl 4) bor (B2 bsr 4)),
+ b64e((B2 band 15) bsl 2), $=];
+encode_binary_to_string(<<B1:8, B2:8, B3:8, Ls/bits>>) ->
+ BB = (B1 bsl 16) bor (B2 bsl 8) bor B3,
+ [b64e(BB bsr 18),
+ b64e((BB bsr 12) band 63),
+ b64e((BB bsr 6) band 63),
+ b64e(BB band 63) | encode_binary_to_string(Ls)].
-encode_l([]) ->
+encode_list_to_string([]) ->
[];
-encode_l([A]) ->
- [b64e(A bsr 2),
- b64e((A band 3) bsl 4), $=, $=];
-encode_l([A,B]) ->
- [b64e(A bsr 2),
- b64e(((A band 3) bsl 4) bor (B bsr 4)),
- b64e((B band 15) bsl 2), $=];
-encode_l([A,B,C|Ls]) ->
- BB = (A bsl 16) bor (B bsl 8) bor C,
+encode_list_to_string([B1]) ->
+ [b64e(B1 bsr 2),
+ b64e((B1 band 3) bsl 4), $=, $=];
+encode_list_to_string([B1,B2]) ->
+ [b64e(B1 bsr 2),
+ b64e(((B1 band 3) bsl 4) bor (B2 bsr 4)),
+ b64e((B2 band 15) bsl 2), $=];
+encode_list_to_string([B1,B2,B3|Ls]) ->
+ BB = (B1 bsl 16) bor (B2 bsl 8) bor B3,
[b64e(BB bsr 18),
b64e((BB bsr 12) band 63),
b64e((BB bsr 6) band 63),
- b64e(BB band 63) | encode_l(Ls)].
-
-encode_binary(Bin) ->
- Split = 3*(byte_size(Bin) div 3),
- <<Main0:Split/binary,Rest/binary>> = Bin,
- Main = << <<(b64e(C)):8>> || <<C:6>> <= Main0 >>,
- case Rest of
- <<A:6,B:6,C:4>> ->
- <<Main/binary,(b64e(A)):8,(b64e(B)):8,(b64e(C bsl 2)):8,$=:8>>;
- <<A:6,B:2>> ->
- <<Main/binary,(b64e(A)):8,(b64e(B bsl 4)):8,$=:8,$=:8>>;
- <<>> ->
- Main
- end.
+ b64e(BB band 63) | encode_list_to_string(Ls)].
-%%-------------------------------------------------------------------------
-%% mime_decode(Base64) -> ASCII
-%% decode(Base64) -> ASCII
-%% Base64 - string() | binary()
-%% ASCII - binary()
-%%
-%% Description: Decodes an base64 encoded string to plain ASCII.
-%% mime_decode strips away all characters not Base64 before converting,
-%% whereas decode crashes if an illegal character is found
-%%-------------------------------------------------------------------------
+encode_binary(<<>>, A) ->
+ A;
+encode_binary(<<B1:8>>, A) ->
+ <<A/binary,(b64e(B1 bsr 2)):8,(b64e((B1 band 3) bsl 4)):8,$=:8,$=:8>>;
+encode_binary(<<B1:8, B2:8>>, A) ->
+ <<A/binary,(b64e(B1 bsr 2)):8,
+ (b64e(((B1 band 3) bsl 4) bor (B2 bsr 4))):8,
+ (b64e((B2 band 15) bsl 2)):8, $=:8>>;
+encode_binary(<<B1:8, B2:8, B3:8, Ls/bits>>, A) ->
+ BB = (B1 bsl 16) bor (B2 bsl 8) bor B3,
+ encode_binary(Ls,
+ <<A/binary,(b64e(BB bsr 18)):8,
+ (b64e((BB bsr 12) band 63)):8,
+ (b64e((BB bsr 6) band 63)):8,
+ (b64e(BB band 63)):8>>).
+
+encode_list([], A) ->
+ A;
+encode_list([B1], A) ->
+ <<A/binary,(b64e(B1 bsr 2)):8,(b64e((B1 band 3) bsl 4)):8,$=:8,$=:8>>;
+encode_list([B1,B2], A) ->
+ <<A/binary,(b64e(B1 bsr 2)):8,
+ (b64e(((B1 band 3) bsl 4) bor (B2 bsr 4))):8,
+ (b64e((B2 band 15) bsl 2)):8, $=:8>>;
+encode_list([B1,B2,B3|Ls], A) ->
+ BB = (B1 bsl 16) bor (B2 bsl 8) bor B3,
+ encode_list(Ls,
+ <<A/binary,(b64e(BB bsr 18)):8,
+ (b64e((BB bsr 12) band 63)):8,
+ (b64e((BB bsr 6) band 63)):8,
+ (b64e(BB band 63)):8>>).
+
+%% mime_decode strips away all characters not Base64 before
+%% converting, whereas decode crashes if an illegal character is found
-spec decode(Base64) -> Data when
Base64 :: ascii_string() | ascii_binary(),
@@ -122,158 +128,280 @@ decode(List) when is_list(List) ->
Data :: ascii_binary().
mime_decode(Bin) when is_binary(Bin) ->
- mime_decode_binary(<<>>, Bin);
+ mime_decode_binary(Bin, <<>>);
mime_decode(List) when is_list(List) ->
- mime_decode(list_to_binary(List)).
+ mime_decode_list(List, <<>>).
--spec decode_l(ascii_string()) -> ascii_string().
-
-decode_l(List) ->
- L = strip_spaces(List, []),
- decode(L, []).
-
--spec mime_decode_l(ascii_string()) -> ascii_string().
-
-mime_decode_l(List) ->
- L = strip_illegal(List, [], 0),
- decode(L, []).
-
-%%-------------------------------------------------------------------------
-%% mime_decode_to_string(Base64) -> ASCII
-%% decode_to_string(Base64) -> ASCII
-%% Base64 - string() | binary()
-%% ASCII - binary()
-%%
-%% Description: Decodes an base64 encoded string to plain ASCII.
-%% mime_decode strips away all characters not Base64 before converting,
-%% whereas decode crashes if an illegal character is found
-%%-------------------------------------------------------------------------
+%% mime_decode_to_string strips away all characters not Base64 before
+%% converting, whereas decode_to_string crashes if an illegal
+%% character is found
-spec decode_to_string(Base64) -> DataString when
Base64 :: ascii_string() | ascii_binary(),
DataString :: ascii_string().
decode_to_string(Bin) when is_binary(Bin) ->
- decode_to_string(binary_to_list(Bin));
+ decode_binary_to_string(Bin);
decode_to_string(List) when is_list(List) ->
- decode_l(List).
+ decode_list_to_string(List).
-spec mime_decode_to_string(Base64) -> DataString when
Base64 :: ascii_string() | ascii_binary(),
DataString :: ascii_string().
mime_decode_to_string(Bin) when is_binary(Bin) ->
- mime_decode_to_string(binary_to_list(Bin));
+ mime_decode_binary_to_string(Bin);
mime_decode_to_string(List) when is_list(List) ->
- mime_decode_l(List).
-
-%% One-based decode map.
--define(DECODE_MAP,
- {bad,bad,bad,bad,bad,bad,bad,bad,ws,ws,bad,bad,ws,bad,bad, %1-15
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, %16-31
- ws,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,62,bad,bad,bad,63, %32-47
- 52,53,54,55,56,57,58,59,60,61,bad,bad,bad,eq,bad,bad, %48-63
- bad,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
- 15,16,17,18,19,20,21,22,23,24,25,bad,bad,bad,bad,bad,
- bad,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
- 41,42,43,44,45,46,47,48,49,50,51,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad}).
+ mime_decode_list_to_string(List).
-decode_binary(<<C1:8, Cs/bits>>, A) ->
- case element(C1, ?DECODE_MAP) of
- ws -> decode_binary(Cs, A);
- B1 -> decode_binary(Cs, A, B1)
+%% Skipping pad character if not at end of string. Also liberal about
+%% excess padding and skipping of other illegal (non-base64 alphabet)
+%% characters. See section 3.3 of RFC4648
+mime_decode_list([0 | Cs], A) ->
+ mime_decode_list(Cs, A);
+mime_decode_list([C1 | Cs], A) ->
+ case b64d(C1) of
+ B1 when is_integer(B1) -> mime_decode_list(Cs, A, B1);
+ _ -> mime_decode_list(Cs, A) % eq is padding
end;
-decode_binary(<<>>, A) ->
+mime_decode_list([], A) ->
A.
-decode_binary(<<C2:8, Cs/bits>>, A, B1) ->
- case element(C2, ?DECODE_MAP) of
- ws -> decode_binary(Cs, A, B1);
- B2 -> decode_binary(Cs, A, B1, B2)
+mime_decode_list([0 | Cs], A, B1) ->
+ mime_decode_list(Cs, A, B1);
+mime_decode_list([C2 | Cs], A, B1) ->
+ case b64d(C2) of
+ B2 when is_integer(B2) ->
+ mime_decode_list(Cs, A, B1, B2);
+ _ -> mime_decode_list(Cs, A, B1) % eq is padding
end.
-decode_binary(<<C3:8, Cs/bits>>, A, B1, B2) ->
- case element(C3, ?DECODE_MAP) of
- ws -> decode_binary(Cs, A, B1, B2);
- B3 -> decode_binary(Cs, A, B1, B2, B3)
+mime_decode_list([0 | Cs], A, B1, B2) ->
+ mime_decode_list(Cs, A, B1, B2);
+mime_decode_list([C3 | Cs], A, B1, B2) ->
+ case b64d(C3) of
+ B3 when is_integer(B3) ->
+ mime_decode_list(Cs, A, B1, B2, B3);
+ eq=B3 ->
+ mime_decode_list_after_eq(Cs, A, B1, B2, B3);
+ _ -> mime_decode_list(Cs, A, B1, B2)
end.
-decode_binary(<<C4:8, Cs/bits>>, A, B1, B2, B3) ->
- case element(C4, ?DECODE_MAP) of
- ws -> decode_binary(Cs, A, B1, B2, B3);
- eq when B3 =:= eq -> only_ws_binary(Cs, <<A/binary,B1:6,(B2 bsr 4):2>>);
- eq -> only_ws_binary(Cs, <<A/binary,B1:6,B2:6,(B3 bsr 2):4>>);
- B4 -> decode_binary(Cs, <<A/binary,B1:6,B2:6,B3:6,B4:6>>)
+mime_decode_list([0 | Cs], A, B1, B2, B3) ->
+ mime_decode_list(Cs, A, B1, B2, B3);
+mime_decode_list([C4 | Cs], A, B1, B2, B3) ->
+ case b64d(C4) of
+ B4 when is_integer(B4) ->
+ mime_decode_list(Cs, <<A/bits,B1:6,B2:6,B3:6,B4:6>>);
+ eq ->
+ mime_decode_list_after_eq(Cs, A, B1, B2, B3);
+ _ -> mime_decode_list(Cs, A, B1, B2, B3)
end.
-only_ws_binary(<<>>, A) ->
- A;
-only_ws_binary(<<C:8, Cs/bits>>, A) ->
- case element(C, ?DECODE_MAP) of
- ws -> only_ws_binary(Cs, A);
- _ -> erlang:error(function_clause)
+mime_decode_list_after_eq([0 | Cs], A, B1, B2, B3) ->
+ mime_decode_list_after_eq(Cs, A, B1, B2, B3);
+mime_decode_list_after_eq([C | Cs], A, B1, B2, B3) ->
+ case b64d(C) of
+ B when is_integer(B) ->
+ %% More valid data, skip the eq as invalid
+ case B3 of
+ eq -> mime_decode_list(Cs, A, B1, B2, B);
+ _ -> mime_decode_list(Cs, <<A/bits,B1:6,B2:6,B3:6,B:6>>)
+ end;
+ _ -> mime_decode_list_after_eq(Cs, A, B1, B2, B3)
+ end;
+mime_decode_list_after_eq([], A, B1, B2, eq) ->
+ <<A/binary,B1:6,(B2 bsr 4):2>>;
+mime_decode_list_after_eq([], A, B1, B2, B3) ->
+ <<A/binary,B1:6,B2:6,(B3 bsr 2):4>>.
+
+mime_decode_binary(<<0:8, Cs/bits>>, A) ->
+ mime_decode_binary(Cs, A);
+mime_decode_binary(<<C1:8, Cs/bits>>, A) ->
+ case b64d(C1) of
+ B1 when is_integer(B1) -> mime_decode_binary(Cs, A, B1);
+ _ -> mime_decode_binary(Cs, A) % eq is padding
+ end;
+mime_decode_binary(<<>>, A) ->
+ A.
+
+mime_decode_binary(<<0:8, Cs/bits>>, A, B1) ->
+ mime_decode_binary(Cs, A, B1);
+mime_decode_binary(<<C2:8, Cs/bits>>, A, B1) ->
+ case b64d(C2) of
+ B2 when is_integer(B2) ->
+ mime_decode_binary(Cs, A, B1, B2);
+ _ -> mime_decode_binary(Cs, A, B1) % eq is padding
end.
-%% Skipping pad character if not at end of string. Also liberal about
-%% excess padding and skipping of other illegal (non-base64 alphabet)
-%% characters. See section 3.3 of RFC4648
-mime_decode_binary(Result, <<0:8,T/bits>>) ->
- mime_decode_binary(Result, T);
-mime_decode_binary(Result0, <<C:8,T/bits>>) ->
- case element(C, ?DECODE_MAP) of
- Bits when is_integer(Bits) ->
- mime_decode_binary(<<Result0/bits,Bits:6>>, T);
+mime_decode_binary(<<0:8, Cs/bits>>, A, B1, B2) ->
+ mime_decode_binary(Cs, A, B1, B2);
+mime_decode_binary(<<C3:8, Cs/bits>>, A, B1, B2) ->
+ case b64d(C3) of
+ B3 when is_integer(B3) ->
+ mime_decode_binary(Cs, A, B1, B2, B3);
+ eq=B3 ->
+ mime_decode_binary_after_eq(Cs, A, B1, B2, B3);
+ _ -> mime_decode_binary(Cs, A, B1, B2)
+ end.
+
+mime_decode_binary(<<0:8, Cs/bits>>, A, B1, B2, B3) ->
+ mime_decode_binary(Cs, A, B1, B2, B3);
+mime_decode_binary(<<C4:8, Cs/bits>>, A, B1, B2, B3) ->
+ case b64d(C4) of
+ B4 when is_integer(B4) ->
+ mime_decode_binary(Cs, <<A/bits,B1:6,B2:6,B3:6,B4:6>>);
eq ->
- mime_decode_binary_after_eq(Result0, T, false);
- _ ->
- mime_decode_binary(Result0, T)
+ mime_decode_binary_after_eq(Cs, A, B1, B2, B3);
+ _ -> mime_decode_binary(Cs, A, B1, B2, B3)
+ end.
+
+mime_decode_binary_after_eq(<<0:8, Cs/bits>>, A, B1, B2, B3) ->
+ mime_decode_binary_after_eq(Cs, A, B1, B2, B3);
+mime_decode_binary_after_eq(<<C:8, Cs/bits>>, A, B1, B2, B3) ->
+ case b64d(C) of
+ B when is_integer(B) ->
+ %% More valid data, skip the eq as invalid
+ case B3 of
+ eq -> mime_decode_binary(Cs, A, B1, B2, B);
+ _ -> mime_decode_binary(Cs, <<A/bits,B1:6,B2:6,B3:6,B:6>>)
+ end;
+ _ -> mime_decode_binary_after_eq(Cs, A, B1, B2, B3)
end;
-mime_decode_binary(Result, _) ->
- true = is_binary(Result),
- Result.
-
-mime_decode_binary_after_eq(Result, <<0:8,T/bits>>, Eq) ->
- mime_decode_binary_after_eq(Result, T, Eq);
-mime_decode_binary_after_eq(Result0, <<C:8,T/bits>>, Eq) ->
- case element(C, ?DECODE_MAP) of
- bad ->
- mime_decode_binary_after_eq(Result0, T, Eq);
- ws ->
- mime_decode_binary_after_eq(Result0, T, Eq);
+mime_decode_binary_after_eq(<<>>, A, B1, B2, eq) ->
+ <<A/binary,B1:6,(B2 bsr 4):2>>;
+mime_decode_binary_after_eq(<<>>, A, B1, B2, B3) ->
+ <<A/binary,B1:6,B2:6,(B3 bsr 2):4>>.
+
+mime_decode_list_to_string([0 | Cs]) ->
+ mime_decode_list_to_string(Cs);
+mime_decode_list_to_string([C1 | Cs]) ->
+ case b64d(C1) of
+ B1 when is_integer(B1) -> mime_decode_list_to_string(Cs, B1);
+ _ -> mime_decode_list_to_string(Cs) % eq is padding
+ end;
+mime_decode_list_to_string([]) ->
+ [].
+
+mime_decode_list_to_string([0 | Cs], B1) ->
+ mime_decode_list_to_string(Cs, B1);
+mime_decode_list_to_string([C2 | Cs], B1) ->
+ case b64d(C2) of
+ B2 when is_integer(B2) ->
+ mime_decode_list_to_string(Cs, B1, B2);
+ _ -> mime_decode_list_to_string(Cs, B1) % eq is padding
+ end.
+
+mime_decode_list_to_string([0 | Cs], B1, B2) ->
+ mime_decode_list_to_string(Cs, B1, B2);
+mime_decode_list_to_string([C3 | Cs], B1, B2) ->
+ case b64d(C3) of
+ B3 when is_integer(B3) ->
+ mime_decode_list_to_string(Cs, B1, B2, B3);
+ eq=B3 -> mime_decode_list_to_string_after_eq(Cs, B1, B2, B3);
+ _ -> mime_decode_list_to_string(Cs, B1, B2)
+ end.
+
+mime_decode_list_to_string([0 | Cs], B1, B2, B3) ->
+ mime_decode_list_to_string(Cs, B1, B2, B3);
+mime_decode_list_to_string([C4 | Cs], B1, B2, B3) ->
+ case b64d(C4) of
+ B4 when is_integer(B4) ->
+ Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4,
+ Octet1 = Bits4x6 bsr 16,
+ Octet2 = (Bits4x6 bsr 8) band 16#ff,
+ Octet3 = Bits4x6 band 16#ff,
+ [Octet1, Octet2, Octet3 | mime_decode_list_to_string(Cs)];
eq ->
- mime_decode_binary_after_eq(Result0, T, true);
- Bits when is_integer(Bits) ->
+ mime_decode_list_to_string_after_eq(Cs, B1, B2, B3);
+ _ -> mime_decode_list_to_string(Cs, B1, B2, B3)
+ end.
+
+mime_decode_list_to_string_after_eq([0 | Cs], B1, B2, B3) ->
+ mime_decode_list_to_string_after_eq(Cs, B1, B2, B3);
+mime_decode_list_to_string_after_eq([C | Cs], B1, B2, B3) ->
+ case b64d(C) of
+ B when is_integer(B) ->
%% More valid data, skip the eq as invalid
- mime_decode_binary(<<Result0/bits,Bits:6>>, T)
+ case B3 of
+ eq -> mime_decode_list_to_string(Cs, B1, B2, B);
+ _ ->
+ Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B,
+ Octet1 = Bits4x6 bsr 16,
+ Octet2 = (Bits4x6 bsr 8) band 16#ff,
+ Octet3 = Bits4x6 band 16#ff,
+ [Octet1, Octet2, Octet3 | mime_decode_list_to_string(Cs)]
+ end;
+ _ -> mime_decode_list_to_string_after_eq(Cs, B1, B2, B3)
+ end;
+mime_decode_list_to_string_after_eq([], B1, B2, eq) ->
+ binary_to_list(<<B1:6,(B2 bsr 4):2>>);
+mime_decode_list_to_string_after_eq([], B1, B2, B3) ->
+ binary_to_list(<<B1:6,B2:6,(B3 bsr 2):4>>).
+
+mime_decode_binary_to_string(<<0:8, Cs/bits>>) ->
+ mime_decode_binary_to_string(Cs);
+mime_decode_binary_to_string(<<C1:8, Cs/bits>>) ->
+ case b64d(C1) of
+ B1 when is_integer(B1) -> mime_decode_binary_to_string(Cs, B1);
+ _ -> mime_decode_binary_to_string(Cs) % eq is padding
end;
-mime_decode_binary_after_eq(Result0, <<>>, Eq) ->
- %% No more valid data.
- case bit_size(Result0) rem 8 of
- 0 ->
- %% '====' is not uncommon.
- Result0;
- 4 when Eq ->
- %% enforce at least one more '=' only ignoring illegals and spacing
- Split = byte_size(Result0) - 1,
- <<Result:Split/bytes,_:4>> = Result0,
- Result;
- 2 ->
- %% remove 2 bits
- Split = byte_size(Result0) - 1,
- <<Result:Split/bytes,_:2>> = Result0,
- Result
+mime_decode_binary_to_string(<<>>) ->
+ [].
+
+mime_decode_binary_to_string(<<0:8, Cs/bits>>, B1) ->
+ mime_decode_binary_to_string(Cs, B1);
+mime_decode_binary_to_string(<<C2:8, Cs/bits>>, B1) ->
+ case b64d(C2) of
+ B2 when is_integer(B2) ->
+ mime_decode_binary_to_string(Cs, B1, B2);
+ _ -> mime_decode_binary_to_string(Cs, B1) % eq is padding
+ end.
+
+mime_decode_binary_to_string(<<0:8, Cs/bits>>, B1, B2) ->
+ mime_decode_binary_to_string(Cs, B1, B2);
+mime_decode_binary_to_string(<<C3:8, Cs/bits>>, B1, B2) ->
+ case b64d(C3) of
+ B3 when is_integer(B3) ->
+ mime_decode_binary_to_string(Cs, B1, B2, B3);
+ eq=B3 -> mime_decode_binary_to_string_after_eq(Cs, B1, B2, B3);
+ _ -> mime_decode_binary_to_string(Cs, B1, B2)
+ end.
+
+mime_decode_binary_to_string(<<0:8, Cs/bits>>, B1, B2, B3) ->
+ mime_decode_binary_to_string(Cs, B1, B2, B3);
+mime_decode_binary_to_string(<<C4:8, Cs/bits>>, B1, B2, B3) ->
+ case b64d(C4) of
+ B4 when is_integer(B4) ->
+ Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4,
+ Octet1 = Bits4x6 bsr 16,
+ Octet2 = (Bits4x6 bsr 8) band 16#ff,
+ Octet3 = Bits4x6 band 16#ff,
+ [Octet1, Octet2, Octet3 | mime_decode_binary_to_string(Cs)];
+ eq ->
+ mime_decode_binary_to_string_after_eq(Cs, B1, B2, B3);
+ _ -> mime_decode_binary_to_string(Cs, B1, B2, B3)
end.
+mime_decode_binary_to_string_after_eq(<<0:8, Cs/bits>>, B1, B2, B3) ->
+ mime_decode_binary_to_string_after_eq(Cs, B1, B2, B3);
+mime_decode_binary_to_string_after_eq(<<C:8, Cs/bits>>=Cs0, B1, B2, B3) ->
+ case b64d(C) of
+ B when is_integer(B) ->
+ %% More valid data, skip the eq as invalid
+ case B3 of
+ eq -> mime_decode_binary_to_string(Cs, B1, B2, B);
+ _ -> mime_decode_binary_to_string(Cs0, B1, B2, B3)
+ end;
+ _ -> mime_decode_binary_to_string_after_eq(Cs, B1, B2, B3)
+ end;
+mime_decode_binary_to_string_after_eq(<<>>, B1, B2, eq) ->
+ binary_to_list(<<B1:6,(B2 bsr 4):2>>);
+mime_decode_binary_to_string_after_eq(<<>>, B1, B2, B3) ->
+ binary_to_list(<<B1:6,B2:6,(B3 bsr 2):4>>).
+
decode_list([C1 | Cs], A) ->
- case element(C1, ?DECODE_MAP) of
+ case b64d(C1) of
ws -> decode_list(Cs, A);
B1 -> decode_list(Cs, A, B1)
end;
@@ -281,122 +409,167 @@ decode_list([], A) ->
A.
decode_list([C2 | Cs], A, B1) ->
- case element(C2, ?DECODE_MAP) of
+ case b64d(C2) of
ws -> decode_list(Cs, A, B1);
B2 -> decode_list(Cs, A, B1, B2)
end.
decode_list([C3 | Cs], A, B1, B2) ->
- case element(C3, ?DECODE_MAP) of
+ case b64d(C3) of
ws -> decode_list(Cs, A, B1, B2);
B3 -> decode_list(Cs, A, B1, B2, B3)
end.
decode_list([C4 | Cs], A, B1, B2, B3) ->
- case element(C4, ?DECODE_MAP) of
+ case b64d(C4) of
ws -> decode_list(Cs, A, B1, B2, B3);
eq when B3 =:= eq -> only_ws(Cs, <<A/binary,B1:6,(B2 bsr 4):2>>);
eq -> only_ws(Cs, <<A/binary,B1:6,B2:6,(B3 bsr 2):4>>);
B4 -> decode_list(Cs, <<A/binary,B1:6,B2:6,B3:6,B4:6>>)
end.
+decode_binary(<<C1:8, Cs/bits>>, A) ->
+ case b64d(C1) of
+ ws -> decode_binary(Cs, A);
+ B1 -> decode_binary(Cs, A, B1)
+ end;
+decode_binary(<<>>, A) ->
+ A.
+
+decode_binary(<<C2:8, Cs/bits>>, A, B1) ->
+ case b64d(C2) of
+ ws -> decode_binary(Cs, A, B1);
+ B2 -> decode_binary(Cs, A, B1, B2)
+ end.
+
+decode_binary(<<C3:8, Cs/bits>>, A, B1, B2) ->
+ case b64d(C3) of
+ ws -> decode_binary(Cs, A, B1, B2);
+ B3 -> decode_binary(Cs, A, B1, B2, B3)
+ end.
+
+decode_binary(<<C4:8, Cs/bits>>, A, B1, B2, B3) ->
+ case b64d(C4) of
+ ws -> decode_binary(Cs, A, B1, B2, B3);
+ eq when B3 =:= eq -> only_ws_binary(Cs, <<A/binary,B1:6,(B2 bsr 4):2>>);
+ eq -> only_ws_binary(Cs, <<A/binary,B1:6,B2:6,(B3 bsr 2):4>>);
+ B4 -> decode_binary(Cs, <<A/binary,B1:6,B2:6,B3:6,B4:6>>)
+ end.
+
+decode_list_to_string([C1 | Cs]) ->
+ case b64d(C1) of
+ ws -> decode_list_to_string(Cs);
+ B1 -> decode_list_to_string(Cs, B1)
+ end;
+decode_list_to_string([]) ->
+ [].
+
+decode_list_to_string([C2 | Cs], B1) ->
+ case b64d(C2) of
+ ws -> decode_list_to_string(Cs, B1);
+ B2 -> decode_list_to_string(Cs, B1, B2)
+ end.
+
+decode_list_to_string([C3 | Cs], B1, B2) ->
+ case b64d(C3) of
+ ws -> decode_list_to_string(Cs, B1, B2);
+ B3 -> decode_list_to_string(Cs, B1, B2, B3)
+ end.
+
+decode_list_to_string([C4 | Cs], B1, B2, B3) ->
+ case b64d(C4) of
+ ws ->
+ decode_list_to_string(Cs, B1, B2, B3);
+ eq when B3 =:= eq ->
+ only_ws(Cs, binary_to_list(<<B1:6,(B2 bsr 4):2>>));
+ eq ->
+ only_ws(Cs, binary_to_list(<<B1:6,B2:6,(B3 bsr 2):4>>));
+ B4 ->
+ Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4,
+ Octet1 = Bits4x6 bsr 16,
+ Octet2 = (Bits4x6 bsr 8) band 16#ff,
+ Octet3 = Bits4x6 band 16#ff,
+ [Octet1, Octet2, Octet3 | decode_list_to_string(Cs)]
+ end.
+
only_ws([], A) ->
A;
only_ws([C | Cs], A) ->
- case element(C, ?DECODE_MAP) of
+ case b64d(C) of
ws -> only_ws(Cs, A);
_ -> erlang:error(function_clause)
end.
-decode([], A) -> A;
-decode([$=,$=,C2,C1|Cs], A) ->
- Bits2x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12),
- Octet1 = Bits2x6 bsr 16,
- decode(Cs, [Octet1|A]);
-decode([$=,C3,C2,C1|Cs], A) ->
- Bits3x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12)
- bor (b64d(C3) bsl 6),
- Octet1 = Bits3x6 bsr 16,
- Octet2 = (Bits3x6 bsr 8) band 16#ff,
- decode(Cs, [Octet1,Octet2|A]);
-decode([C4,C3,C2,C1| Cs], A) ->
- Bits4x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12)
- bor (b64d(C3) bsl 6) bor b64d(C4),
- Octet1 = Bits4x6 bsr 16,
- Octet2 = (Bits4x6 bsr 8) band 16#ff,
- Octet3 = Bits4x6 band 16#ff,
- decode(Cs, [Octet1,Octet2,Octet3|A]).
+decode_binary_to_string(<<C1:8, Cs/bits>>) ->
+ case b64d(C1) of
+ ws -> decode_binary_to_string(Cs);
+ B1 -> decode_binary_to_string(Cs, B1)
+ end;
+decode_binary_to_string(<<>>) ->
+ [].
-%%%========================================================================
-%%% Internal functions
-%%%========================================================================
+decode_binary_to_string(<<C2:8, Cs/bits>>, B1) ->
+ case b64d(C2) of
+ ws -> decode_binary_to_string(Cs, B1);
+ B2 -> decode_binary_to_string(Cs, B1, B2)
+ end.
-strip_spaces([], A) -> A;
-strip_spaces([$\s|Cs], A) -> strip_spaces(Cs, A);
-strip_spaces([$\t|Cs], A) -> strip_spaces(Cs, A);
-strip_spaces([$\r|Cs], A) -> strip_spaces(Cs, A);
-strip_spaces([$\n|Cs], A) -> strip_spaces(Cs, A);
-strip_spaces([C|Cs], A) -> strip_spaces(Cs, [C | A]).
+decode_binary_to_string(<<C3:8, Cs/bits>>, B1, B2) ->
+ case b64d(C3) of
+ ws -> decode_binary_to_string(Cs, B1, B2);
+ B3 -> decode_binary_to_string(Cs, B1, B2, B3)
+ end.
-%% Skipping pad character if not at end of string. Also liberal about
-%% excess padding and skipping of other illegal (non-base64 alphabet)
-%% characters. See section 3.3 of RFC4648
-strip_illegal([], A, _Cnt) ->
- A;
-strip_illegal([0|Cs], A, Cnt) ->
- strip_illegal(Cs, A, Cnt);
-strip_illegal([C|Cs], A, Cnt) ->
- case element(C, ?DECODE_MAP) of
- bad ->
- strip_illegal(Cs, A, Cnt);
- ws ->
- strip_illegal(Cs, A, Cnt);
- eq ->
- case {tail_contains_more(Cs, false), Cnt rem 4} of
- {{[], _}, 0} ->
- A; %% Ignore extra =
- {{[], true}, 2} ->
- [$=|[$=|A]]; %% 'XX=='
- {{[], _}, 3} ->
- [$=|A]; %% 'XXX='
- {{[H|T], _}, _} ->
- %% more data, skip equals
- strip_illegal(T, [H|A], Cnt+1)
- end;
- _ ->
- strip_illegal(Cs, [C|A], Cnt+1)
+decode_binary_to_string(<<C4:8, Cs/bits>>, B1, B2, B3) ->
+ case b64d(C4) of
+ ws -> decode_binary_to_string(Cs, B1, B2, B3);
+ eq when B3 =:= eq ->
+ only_ws_binary(Cs, binary_to_list(<<B1:6,(B2 bsr 4):2>>));
+ eq ->
+ only_ws_binary(Cs, binary_to_list(<<B1:6,B2:6,(B3 bsr 2):4>>));
+ B4 ->
+ Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4,
+ Octet1 = Bits4x6 bsr 16,
+ Octet2 = (Bits4x6 bsr 8) band 16#ff,
+ Octet3 = Bits4x6 band 16#ff,
+ [Octet1, Octet2, Octet3 | decode_binary_to_string(Cs)]
end.
-%% Search the tail for more valid data and remember if we saw
-%% another equals along the way.
-tail_contains_more([], Eq) ->
- {[], Eq};
-tail_contains_more(<<>>, Eq) ->
- {<<>>, Eq};
-tail_contains_more([C|T]=More, Eq) ->
- case element(C, ?DECODE_MAP) of
- bad ->
- tail_contains_more(T, Eq);
- ws ->
- tail_contains_more(T, Eq);
- eq ->
- tail_contains_more(T, true);
- _ ->
- {More, Eq}
- end;
-tail_contains_more(<<C:8,T/bits>> =More, Eq) ->
- case element(C, ?DECODE_MAP) of
- bad ->
- tail_contains_more(T, Eq);
- ws ->
- tail_contains_more(T, Eq);
- eq ->
- tail_contains_more(T, true);
- _ ->
- {More, Eq}
+only_ws_binary(<<>>, A) ->
+ A;
+only_ws_binary(<<C:8, Cs/bits>>, A) ->
+ case b64d(C) of
+ ws -> only_ws_binary(Cs, A);
+ _ -> erlang:error(function_clause)
end.
-
+
+%%%========================================================================
+%%% Internal functions
+%%%========================================================================
+
%% accessors
+-compile({inline, [{b64d, 1}]}).
+%% One-based decode map.
+b64d(X) ->
+ element(X,
+ {bad,bad,bad,bad,bad,bad,bad,bad,ws,ws,bad,bad,ws,bad,bad, %1-15
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, %16-31
+ ws,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,62,bad,bad,bad,63, %32-47
+ 52,53,54,55,56,57,58,59,60,61,bad,bad,bad,eq,bad,bad, %48-63
+ bad,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
+ 15,16,17,18,19,20,21,22,23,24,25,bad,bad,bad,bad,bad,
+ bad,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
+ 41,42,43,44,45,46,47,48,49,50,51,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad}).
+
+-compile({inline, [{b64e, 1}]}).
b64e(X) ->
element(X+1,
{$A, $B, $C, $D, $E, $F, $G, $H, $I, $J, $K, $L, $M, $N,
@@ -404,9 +577,3 @@ b64e(X) ->
$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n,
$o, $p, $q, $r, $s, $t, $u, $v, $w, $x, $y, $z,
$0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $+, $/}).
-
-
-b64d(X) ->
- b64d_ok(element(X, ?DECODE_MAP)).
-
-b64d_ok(I) when is_integer(I) -> I.