From 3d28da78f3e8bab7a13f4f928cdd7747ab943197 Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Mon, 30 Oct 2017 11:55:42 +0100 Subject: stdlib: Optimize base64 functions A few test cases with zeroes are added. They were not handled correctly before. The access of DECODE_MAP is moved into the inlined function b64d, for symmetry. The function b64e is also inlined. The speed-up is small, but measurable. Note: encode(List), decode(List) and mime_decode(List) no longer call list_to_binary. This can break code that calls the functions with I/O-lists as input. --- lib/stdlib/test/base64_SUITE.erl | 96 +++++++++++++++++++++++----------------- 1 file changed, 56 insertions(+), 40 deletions(-) (limited to 'lib/stdlib/test') diff --git a/lib/stdlib/test/base64_SUITE.erl b/lib/stdlib/test/base64_SUITE.erl index 48b3f5f959..1715b2ebe6 100644 --- a/lib/stdlib/test/base64_SUITE.erl +++ b/lib/stdlib/test/base64_SUITE.erl @@ -119,44 +119,51 @@ illegal(Config) when is_list(Config) -> ok. %%------------------------------------------------------------------------- %% mime_decode and mime_decode_to_string have different implementations -%% so test both with the same input separately. Both functions have -%% the same implementation for binary/string arguments. +%% so test both with the same input separately. %% %% Test base64:mime_decode/1. mime_decode(Config) when is_list(Config) -> + MimeDecode = fun(In) -> + Out = base64:mime_decode(In), + Out = base64:mime_decode(binary_to_list(In)) + end, %% Test correct padding - <<"one">> = base64:mime_decode(<<"b25l">>), - <<"on">> = base64:mime_decode(<<"b24=">>), - <<"o">> = base64:mime_decode(<<"bw==">>), + <<"one">> = MimeDecode(<<"b25l">>), + <<"on">> = MimeDecode(<<"b24=">>), + <<"o">> = MimeDecode(<<"bw==">>), %% Test 1 extra padding - <<"one">> = base64:mime_decode(<<"b25l= =">>), - <<"on">> = base64:mime_decode(<<"b24== =">>), - <<"o">> = base64:mime_decode(<<"bw=== =">>), + <<"one">> = MimeDecode(<<"b25l= =">>), + <<"on">> = MimeDecode(<<"b24== =">>), + <<"o">> = MimeDecode(<<"bw=== =">>), %% Test 2 extra padding - <<"one">> = base64:mime_decode(<<"b25l===">>), - <<"on">> = base64:mime_decode(<<"b24====">>), - <<"o">> = base64:mime_decode(<<"bw=====">>), + <<"one">> = MimeDecode(<<"b25l===">>), + <<"on">> = MimeDecode(<<"b24====">>), + <<"o">> = MimeDecode(<<"bw=====">>), %% Test misc embedded padding - <<"one">> = base64:mime_decode(<<"b2=5l===">>), - <<"on">> = base64:mime_decode(<<"b=24====">>), - <<"o">> = base64:mime_decode(<<"b=w=====">>), + <<"one">> = MimeDecode(<<"b2=5l===">>), + <<"on">> = MimeDecode(<<"b=24====">>), + <<"o">> = MimeDecode(<<"b=w=====">>), %% Test misc white space and illegals with embedded padding - <<"one">> = base64:mime_decode(<<" b~2=\r\n5()l===">>), - <<"on">> = base64:mime_decode(<<"\tb =2\"¤4=¤= ==">>), - <<"o">> = base64:mime_decode(<<"\nb=w=====">>), + <<"one">> = MimeDecode(<<" b~2=\r\n5()l===">>), + <<"on">> = MimeDecode(<<"\tb =2\"¤4=¤= ==">>), + <<"o">> = MimeDecode(<<"\nb=w=====">>), %% Two pads <<"Aladdin:open sesame">> = - base64:mime_decode("QWxhZGRpbjpvc()GVuIHNlc2FtZQ=="), + MimeDecode(<<"QWxhZGRpbjpvc()GVuIHNlc2FtZQ==">>), %% One pad to ignore, followed by more text - <<"Hello World!!">> = base64:mime_decode(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>), + <<"Hello World!!">> = MimeDecode(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>), %% No pad <<"Aladdin:open sesam">> = - base64:mime_decode("QWxhZGRpbjpvcG¤\")(VuIHNlc2Ft"), + MimeDecode(<<"QWxhZGRpbjpvcG¤\")(VuIHNlc2Ft">>), %% Encoded base 64 strings may be divided by non base 64 chars. %% In this cases whitespaces. <<"0123456789!@#0^&*();:<>,. []{}">> = - base64:mime_decode( + MimeDecode( <<"MDEy MzQ1Njc4 \tOSFAIzBeJ \nio)(oKTs6 PD4sLi \r\nBbXXt9">>), + %% Zeroes + <<"012">> = MimeDecode(<<"\000M\000D\000E\000y=\000">>), + <<"o">> = MimeDecode(<<"bw==\000">>), + <<"o">> = MimeDecode(<<"bw=\000=">>), ok. %%------------------------------------------------------------------------- @@ -165,39 +172,48 @@ mime_decode(Config) when is_list(Config) -> %% Test base64:mime_decode_to_string/1. mime_decode_to_string(Config) when is_list(Config) -> + MimeDecodeToString = + fun(In) -> + Out = base64:mime_decode_to_string(In), + Out = base64:mime_decode_to_string(binary_to_list(In)) + end, %% Test correct padding - "one" = base64:mime_decode_to_string(<<"b25l">>), - "on" = base64:mime_decode_to_string(<<"b24=">>), - "o" = base64:mime_decode_to_string(<<"bw==">>), + "one" = MimeDecodeToString(<<"b25l">>), + "on" = MimeDecodeToString(<<"b24=">>), + "o" = MimeDecodeToString(<<"bw==">>), %% Test 1 extra padding - "one" = base64:mime_decode_to_string(<<"b25l= =">>), - "on" = base64:mime_decode_to_string(<<"b24== =">>), - "o" = base64:mime_decode_to_string(<<"bw=== =">>), + "one" = MimeDecodeToString(<<"b25l= =">>), + "on" = MimeDecodeToString(<<"b24== =">>), + "o" = MimeDecodeToString(<<"bw=== =">>), %% Test 2 extra padding - "one" = base64:mime_decode_to_string(<<"b25l===">>), - "on" = base64:mime_decode_to_string(<<"b24====">>), - "o" = base64:mime_decode_to_string(<<"bw=====">>), + "one" = MimeDecodeToString(<<"b25l===">>), + "on" = MimeDecodeToString(<<"b24====">>), + "o" = MimeDecodeToString(<<"bw=====">>), %% Test misc embedded padding - "one" = base64:mime_decode_to_string(<<"b2=5l===">>), - "on" = base64:mime_decode_to_string(<<"b=24====">>), - "o" = base64:mime_decode_to_string(<<"b=w=====">>), + "one" = MimeDecodeToString(<<"b2=5l===">>), + "on" = MimeDecodeToString(<<"b=24====">>), + "o" = MimeDecodeToString(<<"b=w=====">>), %% Test misc white space and illegals with embedded padding - "one" = base64:mime_decode_to_string(<<" b~2=\r\n5()l===">>), - "on" = base64:mime_decode_to_string(<<"\tb =2\"¤4=¤= ==">>), - "o" = base64:mime_decode_to_string(<<"\nb=w=====">>), + "one" = MimeDecodeToString(<<" b~2=\r\n5()l===">>), + "on" = MimeDecodeToString(<<"\tb =2\"¤4=¤= ==">>), + "o" = MimeDecodeToString(<<"\nb=w=====">>), %% Two pads "Aladdin:open sesame" = - base64:mime_decode_to_string("QWxhZGRpbjpvc()GVuIHNlc2FtZQ=="), + MimeDecodeToString(<<"QWxhZGRpbjpvc()GVuIHNlc2FtZQ==">>), %% One pad to ignore, followed by more text - "Hello World!!" = base64:mime_decode_to_string(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>), + "Hello World!!" = MimeDecodeToString(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>), %% No pad "Aladdin:open sesam" = - base64:mime_decode_to_string("QWxhZGRpbjpvcG¤\")(VuIHNlc2Ft"), + MimeDecodeToString(<<"QWxhZGRpbjpvcG¤\")(VuIHNlc2Ft">>), %% Encoded base 64 strings may be divided by non base 64 chars. %% In this cases whitespaces. "0123456789!@#0^&*();:<>,. []{}" = - base64:mime_decode_to_string( + MimeDecodeToString( <<"MDEy MzQ1Njc4 \tOSFAIzBeJ \nio)(oKTs6 PD4sLi \r\nBbXXt9">>), + %% Zeroes + "012" = MimeDecodeToString(<<"\000M\000D\000E\000y=\000">>), + "o" = MimeDecodeToString(<<"bw==\000">>), + "o" = MimeDecodeToString(<<"bw=\000=">>), ok. %%------------------------------------------------------------------------- -- cgit v1.2.3 From 2ead5d429fe87ffedf0134d918c3b404e9fa70fe Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Wed, 8 Nov 2017 12:41:23 +0100 Subject: stdlib: Do not check base64 input more than needed Often the decode functions return a function_clause error, but not always, and the errors have not been consistent between modifications of the base64 module. Now the errors are returned as they happen--no attempt to make them look nice is done. The alternative, to ensure that, for example, {badarg, Culprit} is always returned upon bad input, was deemed pointless. --- lib/stdlib/test/base64_SUITE.erl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'lib/stdlib/test') diff --git a/lib/stdlib/test/base64_SUITE.erl b/lib/stdlib/test/base64_SUITE.erl index 1715b2ebe6..1fc4c3fc0e 100644 --- a/lib/stdlib/test/base64_SUITE.erl +++ b/lib/stdlib/test/base64_SUITE.erl @@ -97,10 +97,9 @@ base64_otp_5635(Config) when is_list(Config) -> <<"===">> = base64:decode(base64:encode("===")), ok. %%------------------------------------------------------------------------- -%% OTP-6279: Guard needed so that function fails in a correct -%% way for faulty input, i.e. function_clause. +%% OTP-6279: Make sure illegal characters are rejected when decoding. base64_otp_6279(Config) when is_list(Config) -> - {'EXIT',{function_clause, _}} = (catch base64:decode("dGVzda==a")), + {'EXIT',_} = (catch base64:decode("dGVzda==a")), ok. %%------------------------------------------------------------------------- %% Encode and decode big binaries. @@ -115,7 +114,13 @@ big(Config) when is_list(Config) -> %%------------------------------------------------------------------------- %% Make sure illegal characters are rejected when decoding. illegal(Config) when is_list(Config) -> - {'EXIT',{function_clause, _}} = (catch base64:decode("()")), + %% A few samples with different error reasons. Nothing can be + %% assumed about the reason for the crash. + {'EXIT',_} = (catch base64:decode("()")), + {'EXIT',_} = (catch base64:decode(<<19:8,20:8,21:8,22:8>>)), + {'EXIT',_} = (catch base64:decode([19,20,21,22])), + {'EXIT',_} = (catch base64:decode_to_string(<<19:8,20:8,21:8,22:8>>)), + {'EXIT',_} = (catch base64:decode_to_string([19,20,21,22])), ok. %%------------------------------------------------------------------------- %% mime_decode and mime_decode_to_string have different implementations -- cgit v1.2.3 From 7530b72480eb68cc9a5d700ee7f22c5069b61514 Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Thu, 30 Nov 2017 16:09:26 +0100 Subject: stdlib: Add base64 benchmarks --- lib/stdlib/test/stdlib_bench_SUITE.erl | 107 ++++++++++++++++++++++++++++++++- 1 file changed, 104 insertions(+), 3 deletions(-) (limited to 'lib/stdlib/test') diff --git a/lib/stdlib/test/stdlib_bench_SUITE.erl b/lib/stdlib/test/stdlib_bench_SUITE.erl index 8670e7029c..2a9981bb9e 100644 --- a/lib/stdlib/test/stdlib_bench_SUITE.erl +++ b/lib/stdlib/test/stdlib_bench_SUITE.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2012-2016. All Rights Reserved. +%% Copyright Ericsson AB 2012-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -28,13 +28,20 @@ suite() -> [{ct_hooks,[{ts_install_cth,[{nodenames,2}]}]}]. all() -> - [{group,unicode}]. + [{group,unicode}, {group,base64}]. groups() -> [{unicode,[{repeat,5}], [norm_nfc_list, norm_nfc_deep_l, norm_nfc_binary, string_lexemes_list, string_lexemes_binary - ]}]. + ]}, + {base64,[{repeat,5}], + [decode_binary, decode_binary_to_string, + decode_list, decode_list_to_string, + encode_binary, encode_binary_to_string, + encode_list, encode_list_to_string, + mime_binary_decode, mime_binary_decode_to_string, + mime_list_decode, mime_list_decode_to_string]}]. init_per_group(_GroupName, Config) -> Config. @@ -105,3 +112,97 @@ norm_data(Config) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +decode_binary(_Config) -> + test(decode, encoded_binary()). + +decode_binary_to_string(_Config) -> + test(decode_to_string, encoded_binary()). + +decode_list(_Config) -> + test(decode, encoded_list()). + +decode_list_to_string(_Config) -> + test(decode_to_string, encoded_list()). + +encode_binary(_Config) -> + test(encode, binary()). + +encode_binary_to_string(_Config) -> + test(encode_to_string, binary()). + +encode_list(_Config) -> + test(encode, list()). + +encode_list_to_string(_Config) -> + test(encode_to_string, list()). + +mime_binary_decode(_Config) -> + test(mime_decode, encoded_binary()). + +mime_binary_decode_to_string(_Config) -> + test(mime_decode_to_string, encoded_binary()). + +mime_list_decode(_Config) -> + test(mime_decode, encoded_list()). + +mime_list_decode_to_string(_Config) -> + test(mime_decode_to_string, encoded_list()). + +-define(SIZE, 10000). +-define(N, 1000). + +encoded_binary() -> + list_to_binary(encoded_list()). + +encoded_list() -> + L = random_byte_list(round(?SIZE*0.75)), + base64:encode_to_string(L). + +binary() -> + list_to_binary(list()). + +list() -> + random_byte_list(?SIZE). + +test(Func, Data) -> + F = fun() -> loop(?N, Func, Data) end, + {Time, ok} = timer:tc(fun() -> lspawn(F) end), + report_base64(Time). + +loop(0, _F, _D) -> garbage_collect(), ok; +loop(N, F, D) -> + _ = base64:F(D), + loop(N - 1, F, D). + +lspawn(Fun) -> + {Pid, Ref} = spawn_monitor(fun() -> exit(Fun()) end), + receive + {'DOWN', Ref, process, Pid, Rep} -> Rep + end. + +report_base64(Time) -> + Tps = round((?N*1000000)/Time), + ct_event:notify(#event{name = benchmark_data, + data = [{suite, "stdlib_base64"}, + {value, Tps}]}), + Tps. + +%% Copied from base64_SUITE.erl. + +random_byte_list(N) -> + random_byte_list(N, []). + +random_byte_list(0, Acc) -> + Acc; +random_byte_list(N, Acc) -> + random_byte_list(N-1, [rand:uniform(255)|Acc]). + +make_big_binary(N) -> + list_to_binary(mbb(N, [])). + +mbb(N, Acc) when N > 256 -> + B = list_to_binary(lists:seq(0, 255)), + mbb(N - 256, [B | Acc]); +mbb(N, Acc) -> + B = list_to_binary(lists:seq(0, N-1)), + lists:reverse(Acc, B). -- cgit v1.2.3