diff options
-rw-r--r-- | erts/emulator/beam/erl_bif_binary.c | 69 | ||||
-rw-r--r-- | lib/stdlib/test/stdlib.spec | 2 | ||||
-rw-r--r-- | lib/stdlib/test/stdlib_bench_SUITE.erl | 67 |
3 files changed, 106 insertions, 32 deletions
diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c index 469f6a1ea8..245e04cb21 100644 --- a/erts/emulator/beam/erl_bif_binary.c +++ b/erts/emulator/beam/erl_bif_binary.c @@ -796,6 +796,7 @@ static void compute_goodshifts(BMData *bmd) } #define BM_LOOP_FACTOR 10 /* Should we have a higher value? */ +#define MC_LOOP_FACTOR 8 static void bm_init_find_first_match(BinaryFindContext *ctx) { @@ -819,13 +820,38 @@ static BFReturn bm_find_first_match(BinaryFindContext *ctx, byte *haystack) Sint i; Sint j = state->pos; register Uint reds = *reductions; + byte *pos_pointer; + Sint needle_last = blen - 1; + Sint mem_read = len - needle_last - j; - while (j <= len - blen) { + if (mem_read <= 0) { + return BF_NOT_FOUND; + } + mem_read = MIN(mem_read, reds * MC_LOOP_FACTOR); + ASSERT(mem_read > 0); + + pos_pointer = memchr(&haystack[j + needle_last], needle[needle_last], mem_read); + if (pos_pointer == NULL) { + reds -= mem_read / MC_LOOP_FACTOR; + j += mem_read; + } else { + reds -= (pos_pointer - &haystack[j]) / MC_LOOP_FACTOR; + j = pos_pointer - haystack - needle_last; + } + + // Ensure we have at least one reduction before entering the loop + ++reds; + + for(;;) { + if (j > len - blen) { + *reductions = reds; + return BF_NOT_FOUND; + } if (--reds == 0) { state->pos = j; return BF_RESTART; } - for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i) + for (i = needle_last; i >= 0 && needle[i] == haystack[i + j]; --i) ; if (i < 0) { /* found */ *reductions = reds; @@ -835,8 +861,6 @@ static BFReturn bm_find_first_match(BinaryFindContext *ctx, byte *haystack) } j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i); } - *reductions = reds; - return BF_NOT_FOUND; } static void bm_init_find_all(BinaryFindContext *ctx) @@ -875,14 +899,38 @@ static BFReturn bm_find_all_non_overlapping(BinaryFindContext *ctx, byte *haysta Sint *gs = bmd->goodshift; Sint *bs = bmd->badshift; byte *needle = bmd->x; - Sint i; + Sint i = -1; /* Use memchr on start and on every match */ Sint j = state->pos; Uint m = state->m; Uint allocated = state->allocated; FindallData *out = state->out; register Uint reds = *reductions; + byte *pos_pointer; + Sint needle_last = blen - 1; + Sint mem_read; - while (j <= len - blen) { + for(;;) { + if (i < 0) { + mem_read = len - needle_last - j; + if(mem_read <= 0) { + goto done; + } + mem_read = MIN(mem_read, reds * MC_LOOP_FACTOR); + ASSERT(mem_read > 0); + pos_pointer = memchr(&haystack[j + needle_last], needle[needle_last], mem_read); + if (pos_pointer == NULL) { + reds -= mem_read / MC_LOOP_FACTOR; + j += mem_read; + } else { + reds -= (pos_pointer - &haystack[j]) / MC_LOOP_FACTOR; + j = pos_pointer - haystack - needle_last; + } + // Ensure we have at least one reduction when resuming the loop + ++reds; + } + if (j > len - blen) { + goto done; + } if (--reds == 0) { state->pos = j; state->m = m; @@ -890,7 +938,7 @@ static BFReturn bm_find_all_non_overlapping(BinaryFindContext *ctx, byte *haysta state->out = out; return BF_RESTART; } - for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i) + for (i = needle_last; i >= 0 && needle[i] == haystack[i + j]; --i) ; if (i < 0) { /* found */ if (m >= allocated) { @@ -912,6 +960,7 @@ static BFReturn bm_find_all_non_overlapping(BinaryFindContext *ctx, byte *haysta j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i); } } + done: state->m = m; state->out = out; *reductions = reds; @@ -931,6 +980,7 @@ static int do_binary_match_compile(Eterm argument, Eterm *tag, Binary **binp) Eterm t, b, comp_term = NIL; Uint characters; Uint words; + Uint size; characters = 0; words = 0; @@ -946,11 +996,12 @@ static int do_binary_match_compile(Eterm argument, Eterm *tag, Binary **binp) if (binary_bitsize(b) != 0) { goto badarg; } - if (binary_size(b) == 0) { + size = binary_size(b); + if (size == 0) { goto badarg; } ++words; - characters += binary_size(b); + characters += size; } if (is_not_nil(t)) { goto badarg; diff --git a/lib/stdlib/test/stdlib.spec b/lib/stdlib/test/stdlib.spec index 9c625091a8..4de7c1a0eb 100644 --- a/lib/stdlib/test/stdlib.spec +++ b/lib/stdlib/test/stdlib.spec @@ -1,4 +1,4 @@ {suites,"../stdlib_test",all}. {skip_groups,"../stdlib_test",stdlib_bench_SUITE, - [base64,gen_server,gen_statem,unicode], + [binary,base64,gen_server,gen_statem,unicode], "Benchmark only"}. diff --git a/lib/stdlib/test/stdlib_bench_SUITE.erl b/lib/stdlib/test/stdlib_bench_SUITE.erl index 2364e8376f..b937eeb06a 100644 --- a/lib/stdlib/test/stdlib_bench_SUITE.erl +++ b/lib/stdlib/test/stdlib_bench_SUITE.erl @@ -29,7 +29,7 @@ suite() -> [{ct_hooks,[{ts_install_cth,[{nodenames,2}]}]}]. all() -> - [{group,unicode},{group,base64}, + [{group,unicode},{group,base64},{group,binary}, {group,gen_server},{group,gen_statem}, {group,gen_server_comparison},{group,gen_statem_comparison}]. @@ -38,6 +38,11 @@ groups() -> [norm_nfc_list, norm_nfc_deep_l, norm_nfc_binary, string_lexemes_list, string_lexemes_binary ]}, + {binary, [{repeat, 5}], + [match_single_pattern_no_match, + matches_single_pattern_no_match, + matches_single_pattern_eventual_match, + matches_single_pattern_frequent_match]}, {base64,[{repeat,5}], [decode_binary, decode_binary_to_string, decode_list, decode_list_to_string, @@ -157,41 +162,59 @@ norm_data(Config) -> %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +match_single_pattern_no_match(_Config) -> + Binary = binary:copy(<<"ugbcfuysabfuqyfikgfsdalpaskfhgjsdgfjwsalp">>, 1000000), + comment(test(binary, match, [Binary, <<"o">>])). + +matches_single_pattern_no_match(_Config) -> + Binary = binary:copy(<<"ugbcfuysabfuqyfikgfsdalpaskfhgjsdgfjwsalp">>, 1000000), + comment(test(binary, matches, [Binary, <<"o">>])). + +matches_single_pattern_eventual_match(_Config) -> + Binary = binary:copy(<<"ugbcfuysabfuqyfikgfsdalpaskfhgjsdgfjwsal\n">>, 1000000), + comment(test(binary, matches, [Binary, <<"\n">>])). + +matches_single_pattern_frequent_match(_Config) -> + Binary = binary:copy(<<"abc\n">>, 1000000), + comment(test(binary, matches, [Binary, <<"abc">>])). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + decode_binary(_Config) -> - comment(test(decode, encoded_binary())). + comment(test(base64, decode, [encoded_binary()])). decode_binary_to_string(_Config) -> - comment(test(decode_to_string, encoded_binary())). + comment(test(base64, decode_to_string, [encoded_binary()])). decode_list(_Config) -> - comment(test(decode, encoded_list())). + comment(test(base64, decode, [encoded_list()])). decode_list_to_string(_Config) -> - comment(test(decode_to_string, encoded_list())). + comment(test(base64, decode_to_string, [encoded_list()])). encode_binary(_Config) -> - comment(test(encode, binary())). + comment(test(base64, encode, [binary()])). encode_binary_to_string(_Config) -> - comment(test(encode_to_string, binary())). + comment(test(base64, encode_to_string, [binary()])). encode_list(_Config) -> - comment(test(encode, list())). + comment(test(base64, encode, [list()])). encode_list_to_string(_Config) -> - comment(test(encode_to_string, list())). + comment(test(base64, encode_to_string, [list()])). mime_binary_decode(_Config) -> - comment(test(mime_decode, encoded_binary())). + comment(test(base64, mime_decode, [encoded_binary()])). mime_binary_decode_to_string(_Config) -> - comment(test(mime_decode_to_string, encoded_binary())). + comment(test(base64, mime_decode_to_string, [encoded_binary()])). mime_list_decode(_Config) -> - comment(test(mime_decode, encoded_list())). + comment(test(base64, mime_decode, [encoded_list()])). mime_list_decode_to_string(_Config) -> - comment(test(mime_decode_to_string, encoded_list())). + comment(test(base64, mime_decode_to_string, [encoded_list()])). -define(SIZE, 10000). -define(N, 1000). @@ -209,15 +232,15 @@ binary() -> list() -> random_byte_list(?SIZE). -test(Func, Data) -> - F = fun() -> loop(?N, Func, Data) end, +test(Mod, Fun, Args) -> + F = fun() -> loop(?N, Mod, Fun, Args) end, {Time, ok} = timer:tc(fun() -> lspawn(F) end), - report_base64(Time). + report_mfa(Time, Mod). -loop(0, _F, _D) -> garbage_collect(), ok; -loop(N, F, D) -> - _ = base64:F(D), - loop(N - 1, F, D). +loop(0, _M, _F, _A) -> garbage_collect(), ok; +loop(N, M, F, A) -> + _ = apply(M, F, A), + loop(N - 1, M, F, A). lspawn(Fun) -> {Pid, Ref} = spawn_monitor(fun() -> exit(Fun()) end), @@ -225,10 +248,10 @@ lspawn(Fun) -> {'DOWN', Ref, process, Pid, Rep} -> Rep end. -report_base64(Time) -> +report_mfa(Time, Mod) -> Tps = round((?N*1000000)/Time), ct_event:notify(#event{name = benchmark_data, - data = [{suite, "stdlib_base64"}, + data = [{suite, "stdlib_" ++ atom_to_list(Mod)}, {value, Tps}]}), Tps. |