From 1dad48ee9f2e1aba6a0ec69d9cf688705d6f187c Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Thu, 29 Apr 2010 20:06:55 +0200 Subject: Add binary:list_to_bin/1 and binary:copy/1,2 Add testcases for binary:list_to_bin/1 and binary:copy/1,2. Add reference implementation of list_to_bin/1. --- erts/emulator/beam/atom.names | 1 + erts/emulator/beam/bif.tab | 6 +- erts/emulator/beam/binary.c | 52 +++---- erts/emulator/beam/erl_bif_binary.c | 257 ++++++++++++++++++++++++++++++++ erts/emulator/beam/erl_binary.h | 7 + lib/stdlib/src/binary.erl | 2 +- lib/stdlib/test/binary_module_SUITE.erl | 63 +++++++- lib/stdlib/test/binref.erl | 13 +- 8 files changed, 362 insertions(+), 39 deletions(-) diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index 0117e14816..1138c0c871 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -102,6 +102,7 @@ atom big atom bif_return_trap atom binary atom binary_bin_to_list_trap +atom binary_copy_trap atom binary_longest_prefix_trap atom binary_longest_suffix_trap atom binary_match_trap diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 4d79856e43..bd908566ee 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -775,9 +775,9 @@ bif binary:part/3 bif binary:bin_to_list/1 bif binary:bin_to_list/2 bif binary:bin_to_list/3 -# bif binary:list_to_bin/1 -# bif binary:copy/1 -# bif binary:copy/2 +bif binary:list_to_bin/1 +bif binary:copy/1 +bif binary:copy/2 # bif binary:referenced_byte_size/1 # bif binary:decode_unsigned/1 # bif binary:decode_unsigned/2 diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c index 9c4076c8ff..c68392fad4 100644 --- a/erts/emulator/beam/binary.c +++ b/erts/emulator/beam/binary.c @@ -346,29 +346,40 @@ BIF_RETTYPE bitstring_to_list_1(BIF_ALIST_1) /* Turn a possibly deep list of ints (and binaries) into */ /* One large binary object */ -BIF_RETTYPE list_to_binary_1(BIF_ALIST_1) +/* + * This bif also exists in the binary module, under the name + * binary:list_to_bin/1, why it's divided into interface and + * implementation. Also the backend for iolist_to_binary_1. + */ + +BIF_RETTYPE erts_list_to_binary_bif(Process *p, Eterm arg) { Eterm bin; int i; int offset; byte* bytes; - if (is_nil(BIF_ARG_1)) { - BIF_RET(new_binary(BIF_P,(byte*)"",0)); + if (is_nil(arg)) { + BIF_RET(new_binary(p,(byte*)"",0)); } - if (is_not_list(BIF_ARG_1)) { + if (is_not_list(arg)) { goto error; } - if ((i = io_list_len(BIF_ARG_1)) < 0) { + if ((i = io_list_len(arg)) < 0) { goto error; } - bin = new_binary(BIF_P, (byte *)NULL, i); + bin = new_binary(p, (byte *)NULL, i); bytes = binary_bytes(bin); - offset = io_list_to_buf(BIF_ARG_1, (char*) bytes, i); + offset = io_list_to_buf(arg, (char*) bytes, i); ASSERT(offset == 0); BIF_RET(bin); - error: - BIF_ERROR(BIF_P, BADARG); + error: + BIF_ERROR(p, BADARG); +} + +BIF_RETTYPE list_to_binary_1(BIF_ALIST_1) +{ + return erts_list_to_binary_bif(BIF_P, BIF_ARG_1); } /* Turn a possibly deep list of ints (and binaries) into */ @@ -376,31 +387,10 @@ BIF_RETTYPE list_to_binary_1(BIF_ALIST_1) BIF_RETTYPE iolist_to_binary_1(BIF_ALIST_1) { - Eterm bin; - int i; - int offset; - byte* bytes; - if (is_binary(BIF_ARG_1)) { BIF_RET(BIF_ARG_1); } - if (is_nil(BIF_ARG_1)) { - BIF_RET(new_binary(BIF_P,(byte*)"",0)); - } - if (is_not_list(BIF_ARG_1)) { - goto error; - } - if ((i = io_list_len(BIF_ARG_1)) < 0) { - goto error; - } - bin = new_binary(BIF_P, (byte *)NULL, i); - bytes = binary_bytes(bin); - offset = io_list_to_buf(BIF_ARG_1, (char*) bytes, i); - ASSERT(offset == 0); - BIF_RET(bin); - - error: - BIF_ERROR(BIF_P, BADARG); + return erts_list_to_binary_bif(BIF_P, BIF_ARG_1); } BIF_RETTYPE list_to_bitstring_1(BIF_ALIST_1) diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c index 0a9454f6bc..e2d3d00db2 100644 --- a/erts/emulator/beam/erl_bif_binary.c +++ b/erts/emulator/beam/erl_bif_binary.c @@ -61,6 +61,8 @@ static Export binary_longest_suffix_trap_export; static BIF_RETTYPE binary_longest_suffix_trap(BIF_ALIST_3); static Export binary_bin_to_list_trap_export; static BIF_RETTYPE binary_bin_to_list_trap(BIF_ALIST_3); +static Export binary_copy_trap_export; +static BIF_RETTYPE binary_copy_trap(BIF_ALIST_2); static Uint max_loop_limit; @@ -105,6 +107,13 @@ void erts_init_bif_binary(void) binary_bin_to_list_trap_export.code[2] = 3; binary_bin_to_list_trap_export.code[3] = (BeamInstr) em_apply_bif; binary_bin_to_list_trap_export.code[4] = (BeamInstr) &binary_bin_to_list_trap; + sys_memset((void *) &binary_copy_trap_export, 0, sizeof(Export)); + binary_copy_trap_export.address = &binary_copy_trap_export.code[3]; + binary_copy_trap_export.code[0] = am_erlang; + binary_copy_trap_export.code[1] = am_binary_copy_trap; + binary_copy_trap_export.code[2] = 2; + binary_copy_trap_export.code[3] = (BeamInstr) em_apply_bif; + binary_copy_trap_export.code[4] = (BeamInstr) &binary_copy_trap; max_loop_limit = 0; return; @@ -121,6 +130,7 @@ Sint erts_binary_set_loop_limit(Sint limit) } else { max_loop_limit = (Uint) limit; } + return save; } @@ -131,6 +141,9 @@ static Uint get_reds(Process *p, int loop_factor) if (tmp != 0 && tmp < reds) { return tmp; } + if (!reds) { + reds = 1; + } return reds; } @@ -2161,6 +2174,250 @@ BIF_RETTYPE binary_bin_to_list_1(BIF_ALIST_1) BIF_ERROR(BIF_P,BADARG); } +/* + * Ok, erlang:list_to_binary does not interrupt, and we really don't want + * an alternative implementation for the exact same thing, why we + * have descided to use the old non-restarting implementation for now. + * In reality, there is seldom many iterations involved in doing this, so the + * problem of long-running-bif's is not really that big in this case. + * So, for now we use the old implementation also in the module binary. + */ + +BIF_RETTYPE binary_list_to_bin_1(BIF_ALIST_1) +{ + return erts_list_to_binary_bif(BIF_P, BIF_ARG_1); +} + +typedef struct { + Uint times_left; + Uint source_size; + int source_type; + byte *source; + byte *temp_alloc; + Uint result_pos; + Binary *result; +} CopyBinState; + +#define BC_TYPE_EMPTY 0 +#define BC_TYPE_HEAP 1 +#define BC_TYPE_ALIGNED 2 /* May or may not point to (emasculated) binary, temp_alloc field is set + so that erts_free_aligned_binary_bytes_extra can handle either */ + + +#define BINARY_COPY_LOOP_FACTOR 100 + +static void cleanup_copy_bin_state(Binary *bp) +{ + CopyBinState *cbs = (CopyBinState *) ERTS_MAGIC_BIN_DATA(bp); + if (cbs->result != NULL) { + erts_bin_free(cbs->result); + cbs->result = NULL; + } + switch (cbs->source_type) { + case BC_TYPE_HEAP: + erts_free(ERTS_ALC_T_BINARY_BUFFER,cbs->source); + break; + case BC_TYPE_ALIGNED: + erts_free_aligned_binary_bytes_extra(cbs->temp_alloc, + ERTS_ALC_T_BINARY_BUFFER); + break; + default: + /* otherwise do nothing */ + break; + } + cbs->source_type = BC_TYPE_EMPTY; +} + +/* + * Binary *erts_bin_nrml_alloc(Uint size); + * Binary *erts_bin_realloc(Binary *bp, Uint size); + * void erts_bin_free(Binary *bp); + */ +static BIF_RETTYPE do_binary_copy(Process *p, Eterm bin, Eterm en) +{ + Uint n; + byte *bytes; + Uint bit_offs; + Uint bit_size; + size_t size; + Uint reds = get_reds(p, BINARY_COPY_LOOP_FACTOR); + Uint target_size; + byte *t; + Uint pos; + + + if (is_not_binary(bin)) { + goto badarg; + } + if (!term_to_Uint(en, &n)) { + goto badarg; + } + if (!n) { + goto badarg; + } + ERTS_GET_BINARY_BYTES(bin,bytes,bit_offs,bit_size); + if (bit_size != 0) { + goto badarg; + } + + size = binary_size(bin); + target_size = size * n; + + if ((target_size - size) >= reds) { + Eterm orig; + Uint offset; + Uint bit_offset; + Uint bit_size; + CopyBinState *cbs; + Eterm *hp; + Eterm trap_term; + int i; + + /* We will trap, set up the structure for trapping right away */ + Binary *mb = erts_create_magic_binary(sizeof(CopyBinState), + cleanup_copy_bin_state); + cbs = ERTS_MAGIC_BIN_DATA(mb); + + cbs->temp_alloc = NULL; + cbs->source = NULL; + + ERTS_GET_REAL_BIN(bin, orig, offset, bit_offset, bit_size); + if (*(binary_val(orig)) == HEADER_PROC_BIN) { + ProcBin* pb = (ProcBin *) binary_val(orig); + if (pb->flags) { + erts_emasculate_writable_binary(pb); + } + cbs->source = + erts_get_aligned_binary_bytes_extra(bin, + &(cbs->temp_alloc), + ERTS_ALC_T_BINARY_BUFFER, + 0); + cbs->source_type = BC_TYPE_ALIGNED; + } else { /* Heap binary */ + cbs->source = + erts_get_aligned_binary_bytes_extra(bin, + &(cbs->temp_alloc), + ERTS_ALC_T_BINARY_BUFFER, + 0); + if (!(cbs->temp_alloc)) { /* alignment not needed, need to copy */ + byte *tmp = erts_alloc(ERTS_ALC_T_BINARY_BUFFER,size); + memcpy(tmp,cbs->source,size); + cbs->source = tmp; + cbs->source_type = BC_TYPE_HEAP; + } else { + cbs->source_type = BC_TYPE_ALIGNED; + } + } + cbs->result = erts_bin_nrml_alloc(target_size); /* Always offheap + if trapping */ + cbs->result->flags = 0; + cbs->result->orig_size = target_size; + erts_refc_init(&(cbs->result->refc), 1); + t = (byte *) cbs->result->orig_bytes; /* No offset or anything */ + pos = 0; + i = 0; + while (pos < reds) { + memcpy(t+pos,cbs->source, size); + pos += size; + ++i; + } + cbs->source_size = size; + cbs->result_pos = pos; + cbs->times_left = n-i; + hp = HAlloc(p,PROC_BIN_SIZE); + trap_term = erts_mk_magic_binary_term(&hp, &MSO(p), mb); + BUMP_ALL_REDS(p); + BIF_TRAP2(&binary_copy_trap_export, p, bin, trap_term); + } else { + Eterm res_term; + byte *temp_alloc = NULL; + byte *source = + erts_get_aligned_binary_bytes(bin, + &temp_alloc); + if (target_size <= ERL_ONHEAP_BIN_LIMIT) { + res_term = erts_new_heap_binary(p,NULL,target_size,&t); + } else { + res_term = erts_new_mso_binary(p,NULL,target_size); + t = ((ProcBin *) binary_val(res_term))->bytes; + } + pos = 0; + while (pos < target_size) { + memcpy(t+pos,source, size); + pos += size; + } + erts_free_aligned_binary_bytes(temp_alloc); + BUMP_REDS(p,pos / BINARY_COPY_LOOP_FACTOR); + BIF_RET(res_term); + } + badarg: + BIF_ERROR(p,BADARG); +} + +BIF_RETTYPE binary_copy_trap(BIF_ALIST_2) +{ + Uint n; + size_t size; + Uint reds = get_reds(BIF_P, BINARY_COPY_LOOP_FACTOR); + byte *t; + Uint pos; + Binary *mb = ((ProcBin *) binary_val(BIF_ARG_2))->val; + CopyBinState *cbs = (CopyBinState *) ERTS_MAGIC_BIN_DATA(mb); + Uint opos; + + /* swapout... */ + n = cbs->times_left; + size = cbs->source_size; + opos = pos = cbs->result_pos; + t = (byte *) cbs->result->orig_bytes; /* "well behaved" binary */ + if ((n-1) * size >= reds) { + Uint i = 0; + while ((pos - opos) < reds) { + memcpy(t+pos,cbs->source, size); + pos += size; + ++i; + } + cbs->result_pos = pos; + cbs->times_left -= i; + BUMP_ALL_REDS(BIF_P); + BIF_TRAP2(&binary_copy_trap_export, BIF_P, BIF_ARG_1, BIF_ARG_2); + } else { + Binary *save; + ProcBin* pb; + Uint target_size = cbs->result->orig_size; + while (pos < target_size) { + memcpy(t+pos,cbs->source, size); + pos += size; + } + save = cbs->result; + cbs->result = NULL; + cleanup_copy_bin_state(mb); /* now cbs is dead */ + pb = (ProcBin *) HAlloc(BIF_P, PROC_BIN_SIZE); + pb->thing_word = HEADER_PROC_BIN; + pb->size = target_size; + pb->next = MSO(BIF_P).mso; + MSO(BIF_P).mso = pb; + pb->val = save; + pb->bytes = t; + pb->flags = 0; + + MSO(BIF_P).overhead += target_size / sizeof(Eterm); + BUMP_REDS(BIF_P,(pos - opos) / BINARY_COPY_LOOP_FACTOR); + + BIF_RET(make_binary(pb)); + } +} + + +BIF_RETTYPE binary_copy_1(BIF_ALIST_1) +{ + return do_binary_copy(BIF_P,BIF_ARG_1,make_small(1)); +} + +BIF_RETTYPE binary_copy_2(BIF_ALIST_2) +{ + return do_binary_copy(BIF_P,BIF_ARG_1,BIF_ARG_2); +} + /* * Hard debug functions (dump) for the search structures */ diff --git a/erts/emulator/beam/erl_binary.h b/erts/emulator/beam/erl_binary.h index 74d7966ca0..aeeebf3c74 100644 --- a/erts/emulator/beam/erl_binary.h +++ b/erts/emulator/beam/erl_binary.h @@ -21,6 +21,7 @@ #define __ERL_BINARY_H #include "erl_threads.h" +#include "bif.h" /* * Maximum number of bytes to place in a heap binary. @@ -152,6 +153,12 @@ void erts_init_binary(void); byte* erts_get_aligned_binary_bytes_extra(Eterm, byte**, ErtsAlcType_t, unsigned extra); +/* + * Common implementation for erlang:list_to_binary/1 and binary:list_to_bin/1 + */ + +BIF_RETTYPE erts_list_to_binary_bif(Process *p, Eterm arg); + #if defined(__i386__) || !defined(__GNUC__) /* * Doubles aren't required to be 8-byte aligned on intel x86. diff --git a/lib/stdlib/src/binary.erl b/lib/stdlib/src/binary.erl index ff10a24f66..61b7aacf49 100644 --- a/lib/stdlib/src/binary.erl +++ b/lib/stdlib/src/binary.erl @@ -29,9 +29,9 @@ %% binary:at/2 %% binary:part/{2,3} %% binary:bin_to_list/{1,2,3} -%% - Not yet: %% binary:list_to_bin/1 %% binary:copy/{1,2} +%% - Not yet: %% binary:referenced_byte_size/1 %% binary:decode_unsigned/{1,2} %% diff --git a/lib/stdlib/test/binary_module_SUITE.erl b/lib/stdlib/test/binary_module_SUITE.erl index 00fb87b76f..1c386b049c 100644 --- a/lib/stdlib/test/binary_module_SUITE.erl +++ b/lib/stdlib/test/binary_module_SUITE.erl @@ -1,7 +1,9 @@ -module(binary_module_SUITE). -export([all/1, interesting/1,random_ref_comp/1,random_ref_sr_comp/1, - random_ref_fla_comp/1,parts/1, bin_to_list/1]). + random_ref_fla_comp/1,parts/1, bin_to_list/1, list_to_bin/1, copy/1]). + +-export([random_copy/1]). -define(STANDALONE,1). @@ -26,7 +28,7 @@ run() -> -endif. all(suite) -> [interesting,random_ref_fla_comp,random_ref_sr_comp, - random_ref_comp,parts,bin_to_list]. + random_ref_comp,parts,bin_to_list, list_to_bin, copy]. -define(MASK_ERROR(EXPR),mask_error((catch (EXPR)))). @@ -290,6 +292,62 @@ do_interesting(Module) -> ?line [1,2,3] = ?MASK_ERROR(Module:bin_to_list(<<1,2,3>>,3,-3)), ok. +list_to_bin(doc) -> + ["Test list_to_bin/1 bif"]; +list_to_bin(Config) when is_list(Config) -> + %% Just some smoke_tests first, then go nuts with random cases + ?line badarg = ?MASK_ERROR(binary:list_to_bin({})), + ?line badarg = ?MASK_ERROR(binary:list_to_bin(apa)), + ?line badarg = ?MASK_ERROR(binary:list_to_bin(<<"apa">>)), + F1 = fun(L) -> + ?MASK_ERROR(binref:list_to_bin(L)) + end, + F2 = fun(L) -> + ?MASK_ERROR(binary:list_to_bin(L)) + end, + ?line random_iolist:run(1000,F1,F2), + ok. + +copy(doc) -> + ["Test copy/1,2 bif's"]; +copy(Config) when is_list(Config) -> + ?line <<1,2,3>> = binary:copy(<<1,2,3>>), + ?line RS = random_string({1,10000}), + ?line RS = RS2 = binary:copy(RS), + ?line false = erts_debug:same(RS,RS2), + ?line badarg = ?MASK_ERROR(binary:copy(<<1,2,3>>,0)), + ?line badarg = ?MASK_ERROR(binary:copy(<<>>,0)), + ?line <<>> = binary:copy(<<>>,10000), + ?line random:seed({1271,769940,559934}), + ?line ok = random_copy(3000), + ?line erts_debug:set_internal_state(available_internal_state,true), + ?line io:format("oldlimit: ~p~n", + [erts_debug:set_internal_state(binary_loop_limit,10)]), + ?line ok = random_copy(1000), + ?line io:format("limit was: ~p~n", + [erts_debug:set_internal_state(binary_loop_limit, + default)]), + ?line erts_debug:set_internal_state(available_internal_state,false), + ok. + +random_copy(0) -> + ok; +random_copy(N) -> + Str = random_string({0,N}), + Num = random:uniform(N div 10+1), + A = ?MASK_ERROR(binary:copy(Str,Num)), + B = ?MASK_ERROR(binref:copy(Str,Num)), + C = ?MASK_ERROR(binary:copy(make_unaligned(Str),Num)), + case {(A =:= B), (B =:= C)} of + {true,true} -> + random_copy(N-1); + _ -> + io:format("Failed to pick copy ~s ~p times~n", + [Str,Num]), + io:format("A:~p,~nB:~p,~n,C:~p.~n", + [A,B,C]), + exit(mismatch) + end. bin_to_list(doc) -> ["Test bin_to_list/1,2,3 bif's"]; @@ -426,7 +484,6 @@ random_ref_comp(Config) when is_list(Config) -> ?line do_random_matches_comp3(5,{1,40},{30,1000}), ?line io:format("limit was: ~p~n",[ erts_debug:set_internal_state(binary_loop_limit,default)]), ?line erts_debug:set_internal_state(available_internal_state,false), - ?line put(success_counter,0), ok. random_ref_sr_comp(doc) -> diff --git a/lib/stdlib/test/binref.erl b/lib/stdlib/test/binref.erl index e09a0f2743..2322d036a2 100644 --- a/lib/stdlib/test/binref.erl +++ b/lib/stdlib/test/binref.erl @@ -4,7 +4,8 @@ split/2,split/3,replace/3,replace/4,first/1,last/1,at/2, part/2,part/3,copy/1,copy/2,encode_unsigned/1,encode_unsigned/2, decode_unsigned/1,decode_unsigned/2,referenced_byte_size/1, - longest_common_prefix/1,longest_common_suffix/1,bin_to_list/1, bin_to_list/2, bin_to_list/3 ]). + longest_common_prefix/1,longest_common_suffix/1,bin_to_list/1, + bin_to_list/2,bin_to_list/3,list_to_bin/1]). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -351,6 +352,16 @@ bin_to_list(Subject,A,B) -> _:_ -> erlang:error(badarg) end. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% list_to_bin +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +list_to_bin(List) -> + try + erlang:list_to_binary(List) + catch + _:_ -> + erlang:error(badarg) + end. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% longest_common_prefix -- cgit v1.2.3