From b93e9b611056828ac2c82f225960aa29348ebe97 Mon Sep 17 00:00:00 2001 From: Andrew Bennett Date: Wed, 10 Jun 2015 13:48:30 -0600 Subject: stdlib: Add BIF binary:split/2 and binary:split/3 --- erts/emulator/beam/atom.names | 2 + erts/emulator/beam/bif.tab | 7 + erts/emulator/beam/erl_bif_binary.c | 697 ++++++++++++++++++++++++++++++++++++ lib/stdlib/src/binary.erl | 77 +--- 4 files changed, 714 insertions(+), 69 deletions(-) diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index f9a2f3e33e..3d357886ee 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -121,6 +121,7 @@ atom binary_longest_prefix_trap atom binary_longest_suffix_trap atom binary_match_trap atom binary_matches_trap +atom binary_split_trap atom binary_to_list_continue atom binary_to_term_trap atom block @@ -584,6 +585,7 @@ atom trace trace_ts traced atom trace_control_word atom tracer atom trap_exit +atom trim atom try_clause atom true atom tuple diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 4f0656d174..65f8d6f1f5 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -643,3 +643,10 @@ bif erts_debug:map_info/1 # bif erlang:hash/2 + +# +# New in 19.0 +# + +bif binary:split/2 +bif binary:split/3 diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c index 134aa2d396..68e5fe23c7 100644 --- a/erts/emulator/beam/erl_bif_binary.c +++ b/erts/emulator/beam/erl_bif_binary.c @@ -67,12 +67,16 @@ static Export binary_bin_to_list_trap_export; static BIF_RETTYPE binary_bin_to_list_trap(BIF_ALIST_3); static Export binary_copy_trap_export; static BIF_RETTYPE binary_copy_trap(BIF_ALIST_2); +static Export binary_split_trap_export; +static BIF_RETTYPE binary_split_trap(BIF_ALIST_3); static Uint max_loop_limit; static BIF_RETTYPE binary_match(Process *p, Eterm arg1, Eterm arg2, Eterm arg3); static BIF_RETTYPE binary_matches(Process *p, Eterm arg1, Eterm arg2, Eterm arg3); +static BIF_RETTYPE +binary_split(Process *p, Eterm arg1, Eterm arg2, Eterm arg3); void erts_init_bif_binary(void) { @@ -100,6 +104,10 @@ void erts_init_bif_binary(void) am_erlang, am_binary_copy_trap, 2, &binary_copy_trap); + erts_init_trap_export(&binary_split_trap_export, + am_erlang, am_binary_split_trap, 3, + &binary_split_trap); + max_loop_limit = 0; return; } @@ -2534,6 +2542,695 @@ BIF_RETTYPE binary_copy_2(BIF_ALIST_2) return do_binary_copy(BIF_P,BIF_ARG_1,BIF_ARG_2); } +#define BINARY_SPLIT_GLOBAL 0x01 +#define BINARY_SPLIT_TRIM 0x02 + +static int do_binary_split(Process *p, Eterm subject, Uint hsstart, + Uint hsend, Uint hsflags, Eterm type, Binary *bin, + Eterm state_term, Eterm *res_term) +{ + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + + ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc); + } + if (state_term != NIL) { + Eterm *ptr = big_val(state_term); + type = ptr[1]; + hsflags = (Uint)(ptr[2]); + } + + if (hsflags & BINARY_SPLIT_GLOBAL) { + if (type == am_bm) { + BMData *bm; + Sint pos; + Eterm ret; + Eterm *hp; + BMFindAllState state; + Uint reds = get_reds(p, BM_LOOP_FACTOR); + Uint save_reds = reds; + + bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); +#ifdef HARDDEBUG + dump_bm_data(bm); +#endif + if (state_term == NIL) { + bm_init_find_all(&state, hsstart, hsend); + } else { + Eterm *ptr = big_val(state_term); + bm_restore_find_all(&state, (char *)(ptr+3)); + } + + pos = bm_find_all_non_overlapping(&state, bm, bytes, &reds); + if (pos == BM_NOT_FOUND) { + hp = HAlloc(p, 2); + ret = NIL; + ret = CONS(hp, subject, ret); + } else if (pos == BM_RESTART) { + int x = + (SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + + !!(SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm)); +#ifdef HARDDEBUG + erts_printf("Trap bm!\n"); +#endif + hp = HAlloc(p, x+3); + hp[0] = make_pos_bignum_header(x+2); + hp[1] = type; + hp[2] = (Eterm)(hsflags); + bm_serialize_find_all(&state, (char *)(hp+3)); + *res_term = make_big(hp); + erts_free_aligned_binary_bytes(temp_alloc); + bm_clean_find_all(&state); + return DO_BIN_MATCH_RESTART; + } else { + size_t orig_size; + Eterm orig; + Uint offset; + Uint bit_offset; + Uint bit_size; + ErlSubBin *sb; + FindallData *fad = state.out; + int i, j, k; + orig_size = binary_size(subject); + j = state.m - 1; + k = (int)(orig_size); + if ((hsflags & BINARY_SPLIT_TRIM) && (orig_size - fad[j].pos - fad[j].len) == 0) { + for (i = (j - 1); i >= 0; --i) { + if ((fad[i+1].pos - fad[i].pos - fad[i].len) != 0) { + break; + } + } + if (i == -1) { + if (fad[0].pos == 0) { + ret = NIL; + } else { + hp = HAlloc(p, (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + + sb = (ErlSubBin *)(hp); + sb->thing_word = HEADER_SUB_BIN; + sb->size = fad[0].pos; + sb->offs = offset; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = bit_size; + sb->is_writable = 0; + fad[0].epos = make_binary(sb); + hp += ERL_SUB_BIN_SIZE; + + ret = NIL; + ret = CONS(hp, make_binary(sb), ret); + hp += 2; + } + erts_free_aligned_binary_bytes(temp_alloc); + bm_clean_find_all(&state); + BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); + *res_term = ret; + return DO_BIN_MATCH_OK; + } + j = i; + k = fad[j+1].pos; + } + hp = HAlloc(p, (j + 2) * (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + + sb = (ErlSubBin *)(hp); + sb->thing_word = HEADER_SUB_BIN; + sb->size = fad[0].pos; + sb->offs = offset; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = 0; + sb->is_writable = 0; + fad[0].epos = make_binary(sb); + hp += ERL_SUB_BIN_SIZE; + + for (i = 1; i <= j; ++i) { + sb = (ErlSubBin *)(hp); + sb->thing_word = HEADER_SUB_BIN; + sb->size = fad[i].pos - fad[i-1].pos - fad[i-1].len; + sb->offs = offset + fad[i-1].pos + fad[i-1].len; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = 0; + sb->is_writable = 0; + fad[i].epos = make_binary(sb); + hp += ERL_SUB_BIN_SIZE; + } + ret = NIL; + sb = (ErlSubBin *)(hp); + sb->thing_word = HEADER_SUB_BIN; + sb->size = k - fad[j].pos - fad[j].len; + sb->offs = offset + fad[j].pos + fad[j].len; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = bit_size; + sb->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + ret = CONS(hp, make_binary(sb), ret); + hp += 2; + for (i = j; i >= 0; --i) { + ret = CONS(hp, fad[i].epos, ret); + hp += 2; + } + } + erts_free_aligned_binary_bytes(temp_alloc); + bm_clean_find_all(&state); + BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); + *res_term = ret; + return DO_BIN_MATCH_OK; + } else if (type == am_ac) { + ACTrie *act; + int acr; + ACFindAllState state; + Eterm ret; + Eterm *hp; + Uint reds = get_reds(p, AC_LOOP_FACTOR); + Uint save_reds = reds; + + act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); +#ifdef HARDDEBUG + dump_ac_trie(act); +#endif + if (state_term == NIL) { + ac_init_find_all(&state, act, hsstart, hsend); + } else { + Eterm *ptr = big_val(state_term); + ac_restore_find_all(&state, (char *)(ptr+3)); + } + acr = ac_find_all_non_overlapping(&state, bytes, &reds); + if (acr == AC_NOT_FOUND) { + hp = HAlloc(p, 2); + ret = NIL; + ret = CONS(hp, subject, ret); + } else if (acr == AC_RESTART) { + int x = (SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + + !!(SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm)); +#ifdef HARDDEBUG + erts_printf("Trap ac!\n"); +#endif + hp = HAlloc(p, x+3); + hp[0] = make_pos_bignum_header(x+2); + hp[1] = type; + hp[2] = (Eterm)(hsflags); + ac_serialize_find_all(&state, (char *)(hp+3)); + *res_term = make_big(hp); + erts_free_aligned_binary_bytes(temp_alloc); + ac_clean_find_all(&state); + return DO_BIN_MATCH_RESTART; + } else { + size_t orig_size; + Eterm orig; + Uint offset; + Uint bit_offset; + Uint bit_size; + ErlSubBin *sb; + FindallData *fad = state.out; + int i, j, k; + orig_size = binary_size(subject); + j = state.m - 1; + k = (int)(orig_size); + if ((hsflags & BINARY_SPLIT_TRIM) && (orig_size - fad[j].pos - fad[j].len) == 0) { + for (i = (j - 1); i >= 0; --i) { + if ((fad[i+1].pos - fad[i].pos - fad[i].len) != 0) { + break; + } + } + if (i == -1) { + if (fad[0].pos == 0) { + ret = NIL; + } else { + hp = HAlloc(p, (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + + sb = (ErlSubBin *)(hp); + sb->thing_word = HEADER_SUB_BIN; + sb->size = fad[0].pos; + sb->offs = offset; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = bit_size; + sb->is_writable = 0; + fad[0].epos = make_binary(sb); + hp += ERL_SUB_BIN_SIZE; + + ret = NIL; + ret = CONS(hp, make_binary(sb), ret); + hp += 2; + } + erts_free_aligned_binary_bytes(temp_alloc); + ac_clean_find_all(&state); + BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); + *res_term = ret; + return DO_BIN_MATCH_OK; + } + j = i; + k = fad[j+1].pos; + } + hp = HAlloc(p, (j + 2) * (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + + sb = (ErlSubBin *)(hp); + sb->thing_word = HEADER_SUB_BIN; + sb->size = fad[0].pos; + sb->offs = offset; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = 0; + sb->is_writable = 0; + fad[0].epos = make_binary(sb); + hp += ERL_SUB_BIN_SIZE; + + for (i = 1; i <= j; ++i) { + sb = (ErlSubBin *)(hp); + sb->thing_word = HEADER_SUB_BIN; + sb->size = fad[i].pos - fad[i-1].pos - fad[i-1].len; + sb->offs = offset + fad[i-1].pos + fad[i-1].len; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = 0; + sb->is_writable = 0; + fad[i].epos = make_binary(sb); + hp += ERL_SUB_BIN_SIZE; + } + ret = NIL; + sb = (ErlSubBin *)(hp); + sb->thing_word = HEADER_SUB_BIN; + sb->size = k - fad[j].pos - fad[j].len; + sb->offs = offset + fad[j].pos + fad[j].len; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = bit_size; + sb->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + ret = CONS(hp, make_binary(sb), ret); + hp += 2; + for (i = j; i >= 0; --i) { + ret = CONS(hp, fad[i].epos, ret); + hp += 2; + } + } + erts_free_aligned_binary_bytes(temp_alloc); + ac_clean_find_all(&state); + BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); + *res_term = ret; + return DO_BIN_MATCH_OK; + } + } else { + if (type == am_bm) { + BMData *bm; + Sint pos; + Eterm ret; + Eterm *hp; + BMFindFirstState state; + Uint reds = get_reds(p, BM_LOOP_FACTOR); + Uint save_reds = reds; + + bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); +#ifdef HARDDEBUG + dump_bm_data(bm); +#endif + if (state_term == NIL) { + bm_init_find_first_match(&state, hsstart, hsend); + } else { + Eterm *ptr = big_val(state_term); + memcpy((void *)(&state), (const void *)(ptr+3), sizeof(BMFindFirstState)); + } + +#ifdef HARDDEBUG + erts_printf("(bm) state->pos = %ld, state->len = %lu\n",state.pos, + state.len); +#endif + pos = bm_find_first_match(&state, bm, bytes, &reds); + if (pos == BM_NOT_FOUND) { + hp = HAlloc(p, 2); + ret = NIL; + ret = CONS(hp, subject, ret); + } else if (pos == BM_RESTART) { + int x = + (sizeof(state) / sizeof(Eterm)) + + !!(sizeof(state) % sizeof(Eterm)); +#ifdef HARDDEBUG + erts_printf("Trap bm!\n"); +#endif + hp = HAlloc(p, x+3); + hp[0] = make_pos_bignum_header(x+2); + hp[1] = type; + hp[2] = (Eterm)(hsflags); + memcpy((void *)(hp+3), (const void *)(&state), sizeof(state)); + *res_term = make_big(hp); + erts_free_aligned_binary_bytes(temp_alloc); + return DO_BIN_MATCH_RESTART; + } else { + size_t orig_size; + Eterm orig; + Uint offset; + Uint bit_offset; + Uint bit_size; + ErlSubBin *sb1; + ErlSubBin *sb2; + + orig_size = binary_size(subject); + + if ((hsflags & BINARY_SPLIT_TRIM) && (orig_size - pos - bm->len) == 0) { + if (pos == 0) { + ret = NIL; + } else { + hp = HAlloc(p, (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + sb1 = (ErlSubBin *) hp; + sb1->thing_word = HEADER_SUB_BIN; + sb1->size = pos; + sb1->offs = offset; + sb1->orig = orig; + sb1->bitoffs = bit_offset; + sb1->bitsize = bit_size; + sb1->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + + ret = NIL; + ret = CONS(hp, make_binary(sb1), ret); + hp += 2; + } + } else { + hp = HAlloc(p, 2 * (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + sb1 = (ErlSubBin *) hp; + sb1->thing_word = HEADER_SUB_BIN; + sb1->size = pos; + sb1->offs = offset; + sb1->orig = orig; + sb1->bitoffs = bit_offset; + sb1->bitsize = 0; + sb1->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + + sb2 = (ErlSubBin *) hp; + sb2->thing_word = HEADER_SUB_BIN; + sb2->size = orig_size - pos - bm->len; + sb2->offs = offset + pos + bm->len; + sb2->orig = orig; + sb2->bitoffs = bit_offset; + sb2->bitsize = bit_size; + sb2->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + + ret = NIL; + ret = CONS(hp, make_binary(sb2), ret); + hp += 2; + ret = CONS(hp, make_binary(sb1), ret); + hp += 2; + } + } + erts_free_aligned_binary_bytes(temp_alloc); + BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); + *res_term = ret; + return DO_BIN_MATCH_OK; + } else if (type == am_ac) { + ACTrie *act; + Uint pos, rlen; + int acr; + ACFindFirstState state; + Eterm ret; + Eterm *hp; + Uint reds = get_reds(p, AC_LOOP_FACTOR); + Uint save_reds = reds; + + act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); +#ifdef HARDDEBUG + dump_ac_trie(act); +#endif + if (state_term == NIL) { + ac_init_find_first_match(&state, act, hsstart, hsend); + } else { + Eterm *ptr = big_val(state_term); + memcpy((void *)(&state), (const void *)(ptr+3), sizeof(ACFindFirstState)); + } + acr = ac_find_first_match(&state, bytes, &pos, &rlen, &reds); + if (acr == AC_NOT_FOUND) { + hp = HAlloc(p, 2); + ret = NIL; + ret = CONS(hp, subject, ret); + } else if (acr == AC_RESTART) { + int x = + (sizeof(state) / sizeof(Eterm)) + + !!(sizeof(state) % sizeof(Eterm)); +#ifdef HARDDEBUG + erts_printf("Trap ac!\n"); +#endif + hp = HAlloc(p, x+3); + hp[0] = make_pos_bignum_header(x+2); + hp[1] = type; + hp[2] = (Eterm)(hsflags); + memcpy((void *)(hp+3), (const void *)(&state), sizeof(state)); + *res_term = make_big(hp); + erts_free_aligned_binary_bytes(temp_alloc); + return DO_BIN_MATCH_RESTART; + } else { + size_t orig_size; + Eterm orig; + Uint offset; + Uint bit_offset; + Uint bit_size; + ErlSubBin *sb1; + ErlSubBin *sb2; + + orig_size = binary_size(subject); + + if ((hsflags & BINARY_SPLIT_TRIM) && (orig_size - pos - rlen) == 0) { + if (pos == 0) { + ret = NIL; + } else { + hp = HAlloc(p, (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + sb1 = (ErlSubBin *) hp; + sb1->thing_word = HEADER_SUB_BIN; + sb1->size = pos; + sb1->offs = offset; + sb1->orig = orig; + sb1->bitoffs = bit_offset; + sb1->bitsize = bit_size; + sb1->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + + ret = NIL; + ret = CONS(hp, make_binary(sb1), ret); + hp += 2; + } + } else { + hp = HAlloc(p, 2 * (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + sb1 = (ErlSubBin *) hp; + sb1->thing_word = HEADER_SUB_BIN; + sb1->size = pos; + sb1->offs = offset; + sb1->orig = orig; + sb1->bitoffs = bit_offset; + sb1->bitsize = 0; + sb1->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + + sb2 = (ErlSubBin *) hp; + sb2->thing_word = HEADER_SUB_BIN; + sb2->size = orig_size - pos - rlen; + sb2->offs = offset + pos + rlen; + sb2->orig = orig; + sb2->bitoffs = bit_offset; + sb2->bitsize = bit_size; + sb2->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + + ret = NIL; + ret = CONS(hp, make_binary(sb2), ret); + hp += 2; + ret = CONS(hp, make_binary(sb1), ret); + hp += 2; + } + } + erts_free_aligned_binary_bytes(temp_alloc); + BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); + *res_term = ret; + return DO_BIN_MATCH_OK; + } + } + badarg: + return DO_BIN_MATCH_BADARG; +} + +static int parse_split_opts_list(Eterm l, Eterm bin, Uint *posp, Uint *endp, Uint *optp) +{ + Eterm *tp; + Uint pos; + Sint len; + *optp = 0; + *posp = 0; + *endp = binary_size(bin); + if (l == ((Eterm) 0) || l == NIL) { + return 0; + } else if (is_list(l)) { + while(is_list(l)) { + Eterm t = CAR(list_val(l)); + Uint orig_size; + if (is_atom(t)) { + if (t == am_global) { + *optp |= BINARY_SPLIT_GLOBAL; + l = CDR(list_val(l)); + continue; + } + if (t == am_trim) { + *optp |= BINARY_SPLIT_TRIM; + l = CDR(list_val(l)); + continue; + } + } + if (!is_tuple(t)) { + goto badarg; + } + tp = tuple_val(t); + if (arityval(*tp) != 2) { + goto badarg; + } + if (tp[1] != am_scope || is_not_tuple(tp[2])) { + goto badarg; + } + tp = tuple_val(tp[2]); + if (arityval(*tp) != 2) { + goto badarg; + } + if (!term_to_Uint(tp[1], &pos)) { + goto badarg; + } + if (!term_to_Sint(tp[2], &len)) { + goto badarg; + } + if (len < 0) { + Uint lentmp = -(Uint)len; + /* overflow */ + if ((Sint)lentmp < 0) { + goto badarg; + } + len = lentmp; + pos -= len; + } + /* overflow */ + if ((pos + len) < pos || (len > 0 && (pos + len) == pos)) { + goto badarg; + } + *endp = len + pos; + *posp = pos; + if ((orig_size = binary_size(bin)) < pos || + orig_size < (*endp)) { + goto badarg; + } + l = CDR(list_val(l)); + } + return 0; + } else { + badarg: + return 1; + } +} + +static BIF_RETTYPE binary_split_trap(BIF_ALIST_3) +{ + int runres; + Eterm result; + Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val; + runres = do_binary_split(BIF_P,BIF_ARG_1,0,0,0,NIL,bin,BIF_ARG_2,&result); + if (runres == DO_BIN_MATCH_OK) { + BIF_RET(result); + } else { + BUMP_ALL_REDS(BIF_P); + BIF_TRAP3(&binary_split_trap_export, BIF_P, BIF_ARG_1, result, + BIF_ARG_3); + } +} + +BIF_RETTYPE binary_split_3(BIF_ALIST_3) +{ + return binary_split(BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3); +} + +static BIF_RETTYPE +binary_split(Process *p, Eterm arg1, Eterm arg2, Eterm arg3) +{ + Uint hsflags; + Uint hsstart; + Uint hsend; + Eterm *tp; + Eterm type; + Binary *bin; + Eterm bin_term = NIL; + int runres; + Eterm result; + + if (is_not_binary(arg1)) { + goto badarg; + } + if (parse_split_opts_list(arg3, arg1, &hsstart, &hsend, &hsflags)) { + goto badarg; + } + if (hsend == 0) { + tp = HAlloc(p, 2); + result = NIL; + result = CONS(tp, arg1, result); + BIF_RET(result); + } + if (is_tuple(arg2)) { + tp = tuple_val(arg2); + if (arityval(*tp) != 2 || is_not_atom(tp[1])) { + goto badarg; + } + if (((tp[1] != am_bm) && (tp[1] != am_ac)) || + !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { + goto badarg; + } + type = tp[1]; + bin = ((ProcBin *) binary_val(tp[2]))->val; + if (type == am_bm && + ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data_bm) { + goto badarg; + } + if (type == am_ac && + ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data_ac) { + goto badarg; + } + bin_term = tp[2]; + } else if (do_binary_match_compile(arg2, &type, &bin)) { + goto badarg; + } + runres = do_binary_split(p, arg1, hsstart, hsend, hsflags, type, bin, NIL, &result); + if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { + Eterm *hp = HAlloc(p, PROC_BIN_SIZE); + bin_term = erts_mk_magic_binary_term(&hp, &MSO(p), bin); + } else if (bin_term == NIL) { + erts_bin_free(bin); + } + switch(runres) { + case DO_BIN_MATCH_OK: + BIF_RET(result); + case DO_BIN_MATCH_RESTART: + BIF_TRAP3(&binary_split_trap_export, p, arg1, result, bin_term); + default: + goto badarg; + } + badarg: + BIF_ERROR(p,BADARG); +} + + +BIF_RETTYPE binary_split_2(BIF_ALIST_2) +{ + return binary_split(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); +} + + BIF_RETTYPE binary_referenced_byte_size_1(BIF_ALIST_1) { ErlSubBin *sb; diff --git a/lib/stdlib/src/binary.erl b/lib/stdlib/src/binary.erl index af00410572..fb0c395d70 100644 --- a/lib/stdlib/src/binary.erl +++ b/lib/stdlib/src/binary.erl @@ -20,7 +20,7 @@ -module(binary). %% %% Implemented in this module: --export([split/2,split/3,replace/3,replace/4]). +-export([replace/3,replace/4]). -export_type([cp/0]). @@ -34,7 +34,8 @@ decode_unsigned/2, encode_unsigned/1, encode_unsigned/2, first/1, last/1, list_to_bin/1, longest_common_prefix/1, longest_common_suffix/1, match/2, match/3, matches/2, - matches/3, part/2, part/3, referenced_byte_size/1]). + matches/3, part/2, part/3, referenced_byte_size/1, + split/2, split/3]). -spec at(Subject, Pos) -> byte() when Subject :: binary(), @@ -198,19 +199,13 @@ part(_, _, _) -> referenced_byte_size(_) -> erlang:nif_error(undef). -%%% End of BIFs. - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -%% split -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -spec split(Subject, Pattern) -> Parts when Subject :: binary(), Pattern :: binary() | [binary()] | cp(), Parts :: [binary()]. -split(H,N) -> - split(H,N,[]). +split(_, _) -> + erlang:nif_error(undef). -spec split(Subject, Pattern, Options) -> Parts when Subject :: binary(), @@ -219,53 +214,10 @@ split(H,N) -> Option :: {scope, part()} | trim | global | trim_all, Parts :: [binary()]. -split(Haystack,Needles,Options) -> - try - {Part,Global,Trim,TrimAll} = - get_opts_split(Options,{no,false,false,false}), - Moptlist = case Part of - no -> - []; - {A,B} -> - [{scope,{A,B}}] - end, - MList = if - Global -> - binary:matches(Haystack,Needles,Moptlist); - true -> - case binary:match(Haystack,Needles,Moptlist) of - nomatch -> []; - Match -> [Match] - end - end, - do_split(Haystack,MList,0,Trim,TrimAll) - catch - _:_ -> - erlang:error(badarg) - end. - -do_split(H,[],N,true,_) when N >= byte_size(H) -> - []; -do_split(H,[],N,_,true) when N >= byte_size(H) -> - []; -do_split(H,[],N,_,_) -> - [binary:part(H,{N,byte_size(H)-N})]; -do_split(H,[{A,B}|T],N,Trim,TrimAll) -> - case binary:part(H,{N,A-N}) of - <<>> when TrimAll == true -> - do_split(H,T,A+B,Trim,TrimAll); - <<>> -> - Rest = do_split(H,T,A+B,Trim,TrimAll), - case {Trim, Rest} of - {true,[]} -> - []; - _ -> - [<<>> | Rest] - end; - Oth -> - [Oth | do_split(H,T,A+B,Trim,TrimAll)] - end. +split(_, _, _) -> + erlang:nif_error(undef). +%%% End of BIFs. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% replace @@ -352,19 +304,6 @@ splitat(H,N,[I|T]) -> %% Simple helper functions %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -get_opts_split([],{Part,Global,Trim,TrimAll}) -> - {Part,Global,Trim,TrimAll}; -get_opts_split([{scope,{A,B}} | T],{_Part,Global,Trim,TrimAll}) -> - get_opts_split(T,{{A,B},Global,Trim,TrimAll}); -get_opts_split([global | T],{Part,_Global,Trim,TrimAll}) -> - get_opts_split(T,{Part,true,Trim,TrimAll}); -get_opts_split([trim | T],{Part,Global,_Trim,TrimAll}) -> - get_opts_split(T,{Part,Global,true,TrimAll}); -get_opts_split([trim_all | T],{Part,Global,Trim,_TrimAll}) -> - get_opts_split(T,{Part,Global,Trim,true}); -get_opts_split(_,_) -> - throw(badopt). - get_opts_replace([],{Part,Global,Insert}) -> {Part,Global,Insert}; get_opts_replace([{scope,{A,B}} | T],{_Part,Global,Insert}) -> -- cgit v1.2.3 From d1283d1f826bdbd584bc87572ab46001164939e1 Mon Sep 17 00:00:00 2001 From: Andrew Bennett Date: Thu, 25 Jun 2015 11:26:48 -0600 Subject: stdlib: Add BIF option 'trim_all' to binary:split/3 --- bootstrap/lib/stdlib/ebin/binary.beam | Bin 3812 -> 8408 bytes erts/emulator/beam/atom.names | 1 + erts/emulator/beam/erl_bif_binary.c | 496 ++++++++++++++++++++++++---------- 3 files changed, 349 insertions(+), 148 deletions(-) diff --git a/bootstrap/lib/stdlib/ebin/binary.beam b/bootstrap/lib/stdlib/ebin/binary.beam index 666544c492..32d68c2504 100644 Binary files a/bootstrap/lib/stdlib/ebin/binary.beam and b/bootstrap/lib/stdlib/ebin/binary.beam differ diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index 3d357886ee..5f27aaa14b 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -586,6 +586,7 @@ atom trace_control_word atom tracer atom trap_exit atom trim +atom trim_all atom try_clause atom true atom tuple diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c index 68e5fe23c7..1709f7671d 100644 --- a/erts/emulator/beam/erl_bif_binary.c +++ b/erts/emulator/beam/erl_bif_binary.c @@ -2544,6 +2544,7 @@ BIF_RETTYPE binary_copy_2(BIF_ALIST_2) #define BINARY_SPLIT_GLOBAL 0x01 #define BINARY_SPLIT_TRIM 0x02 +#define BINARY_SPLIT_TRIM_ALL 0x04 static int do_binary_split(Process *p, Eterm subject, Uint hsstart, Uint hsend, Uint hsflags, Eterm type, Binary *bin, @@ -2616,88 +2617,177 @@ static int do_binary_split(Process *p, Eterm subject, Uint hsstart, Uint bit_size; ErlSubBin *sb; FindallData *fad = state.out; - int i, j, k; + Sint i, j; + Sint drop = 0; + Sint head = 0; + Sint tail; + Uint list_size; + Uint tail_pos; + + tail = state.m - 1; + list_size = state.m + 1; orig_size = binary_size(subject); - j = state.m - 1; - k = (int)(orig_size); - if ((hsflags & BINARY_SPLIT_TRIM) && (orig_size - fad[j].pos - fad[j].len) == 0) { - for (i = (j - 1); i >= 0; --i) { - if ((fad[i+1].pos - fad[i].pos - fad[i].len) != 0) { - break; + tail_pos = (Uint)(orig_size); + + if (hsflags & (BINARY_SPLIT_TRIM | BINARY_SPLIT_TRIM_ALL)) { + if ((orig_size - fad[tail].pos - fad[tail].len) == 0) { + list_size--; + for (i = (tail - 1); i >= 0; --i) { + if ((fad[i+1].pos - fad[i].pos - fad[i].len) != 0) { + break; + } + list_size--; } + if (i == -1) { + if (fad[head].pos == 0) { + ret = NIL; + } else { + hp = HAlloc(p, (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + + sb = (ErlSubBin *)(hp); + sb->thing_word = HEADER_SUB_BIN; + sb->size = fad[head].pos; + sb->offs = offset; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = bit_size; + sb->is_writable = 0; + fad[head].epos = make_binary(sb); + hp += ERL_SUB_BIN_SIZE; + + ret = NIL; + ret = CONS(hp, make_binary(sb), ret); + hp += 2; + } + erts_free_aligned_binary_bytes(temp_alloc); + bm_clean_find_all(&state); + BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); + *res_term = ret; + return DO_BIN_MATCH_OK; + } + tail = i; + tail_pos = fad[tail+1].pos; } - if (i == -1) { - if (fad[0].pos == 0) { - ret = NIL; - } else { - hp = HAlloc(p, (ERL_SUB_BIN_SIZE + 2)); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + } + if (hsflags & BINARY_SPLIT_TRIM_ALL) { + if (fad[head].pos == 0) { + drop++; + list_size--; + for (i = drop, j = tail; i <= j; ++i) { + if ((fad[i].pos - fad[i-1].pos - fad[i-1].len) != 0) { + break; + } + drop++; + list_size--; + } + head = drop - 1; + } + for (i = (head+1), j = tail; i <= j; ++i) { + if ((fad[i].pos - fad[i-1].pos - fad[i-1].len) == 0) { + list_size--; + } + } + + hp = HAlloc(p, list_size * (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + if (drop == 0) { + sb = (ErlSubBin *)(hp); + sb->thing_word = HEADER_SUB_BIN; + sb->size = fad[head].pos; + sb->offs = offset; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = 0; + sb->is_writable = 0; + fad[head].epos = make_binary(sb); + hp += ERL_SUB_BIN_SIZE; + } + + for (i = (head+1), j = tail; i <= j; ++i) { + if ((fad[i].pos - fad[i-1].pos - fad[i-1].len) != 0) { sb = (ErlSubBin *)(hp); sb->thing_word = HEADER_SUB_BIN; - sb->size = fad[0].pos; - sb->offs = offset; + sb->size = fad[i].pos - fad[i-1].pos - fad[i-1].len; + sb->offs = offset + fad[i-1].pos + fad[i-1].len; sb->orig = orig; sb->bitoffs = bit_offset; - sb->bitsize = bit_size; + sb->bitsize = 0; sb->is_writable = 0; - fad[0].epos = make_binary(sb); + fad[i].epos = make_binary(sb); hp += ERL_SUB_BIN_SIZE; + } + } - ret = NIL; - ret = CONS(hp, make_binary(sb), ret); + sb = (ErlSubBin *)(hp); + sb->thing_word = HEADER_SUB_BIN; + sb->size = tail_pos - fad[tail].pos - fad[tail].len; + sb->offs = offset + fad[tail].pos + fad[tail].len; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = bit_size; + sb->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + + ret = NIL; + ret = CONS(hp, make_binary(sb), ret); + hp += 2; + for (i = tail, j = head; i > j; --i) { + if ((fad[i].pos - fad[i-1].pos - fad[i-1].len) != 0) { + ret = CONS(hp, fad[i].epos, ret); hp += 2; } - erts_free_aligned_binary_bytes(temp_alloc); - bm_clean_find_all(&state); - BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); - *res_term = ret; - return DO_BIN_MATCH_OK; } - j = i; - k = fad[j+1].pos; - } - hp = HAlloc(p, (j + 2) * (ERL_SUB_BIN_SIZE + 2)); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); - - sb = (ErlSubBin *)(hp); - sb->thing_word = HEADER_SUB_BIN; - sb->size = fad[0].pos; - sb->offs = offset; - sb->orig = orig; - sb->bitoffs = bit_offset; - sb->bitsize = 0; - sb->is_writable = 0; - fad[0].epos = make_binary(sb); - hp += ERL_SUB_BIN_SIZE; - - for (i = 1; i <= j; ++i) { + if (drop == 0) { + ret = CONS(hp, fad[head].epos, ret); + hp += 2; + } + } else { + hp = HAlloc(p, list_size * (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + sb = (ErlSubBin *)(hp); sb->thing_word = HEADER_SUB_BIN; - sb->size = fad[i].pos - fad[i-1].pos - fad[i-1].len; - sb->offs = offset + fad[i-1].pos + fad[i-1].len; + sb->size = fad[head].pos; + sb->offs = offset; sb->orig = orig; sb->bitoffs = bit_offset; sb->bitsize = 0; sb->is_writable = 0; - fad[i].epos = make_binary(sb); + fad[head].epos = make_binary(sb); hp += ERL_SUB_BIN_SIZE; - } - ret = NIL; - sb = (ErlSubBin *)(hp); - sb->thing_word = HEADER_SUB_BIN; - sb->size = k - fad[j].pos - fad[j].len; - sb->offs = offset + fad[j].pos + fad[j].len; - sb->orig = orig; - sb->bitoffs = bit_offset; - sb->bitsize = bit_size; - sb->is_writable = 0; - hp += ERL_SUB_BIN_SIZE; - ret = CONS(hp, make_binary(sb), ret); - hp += 2; - for (i = j; i >= 0; --i) { - ret = CONS(hp, fad[i].epos, ret); + + for (i = (head+1), j = tail; i <= j; ++i) { + sb = (ErlSubBin *)(hp); + sb->thing_word = HEADER_SUB_BIN; + sb->size = fad[i].pos - fad[i-1].pos - fad[i-1].len; + sb->offs = offset + fad[i-1].pos + fad[i-1].len; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = 0; + sb->is_writable = 0; + fad[i].epos = make_binary(sb); + hp += ERL_SUB_BIN_SIZE; + } + + sb = (ErlSubBin *)(hp); + sb->thing_word = HEADER_SUB_BIN; + sb->size = tail_pos - fad[tail].pos - fad[tail].len; + sb->offs = offset + fad[tail].pos + fad[tail].len; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = bit_size; + sb->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + + ret = NIL; + ret = CONS(hp, make_binary(sb), ret); hp += 2; + for (i = tail, j = head; i >= j; --i) { + ret = CONS(hp, fad[i].epos, ret); + hp += 2; + } } } erts_free_aligned_binary_bytes(temp_alloc); @@ -2752,88 +2842,177 @@ static int do_binary_split(Process *p, Eterm subject, Uint hsstart, Uint bit_size; ErlSubBin *sb; FindallData *fad = state.out; - int i, j, k; + Sint i, j; + Sint drop = 0; + Sint head = 0; + Sint tail; + Uint list_size; + Uint tail_pos; + + tail = state.m - 1; + list_size = state.m + 1; orig_size = binary_size(subject); - j = state.m - 1; - k = (int)(orig_size); - if ((hsflags & BINARY_SPLIT_TRIM) && (orig_size - fad[j].pos - fad[j].len) == 0) { - for (i = (j - 1); i >= 0; --i) { - if ((fad[i+1].pos - fad[i].pos - fad[i].len) != 0) { - break; + tail_pos = (Uint)(orig_size); + + if (hsflags & (BINARY_SPLIT_TRIM | BINARY_SPLIT_TRIM_ALL)) { + if ((orig_size - fad[tail].pos - fad[tail].len) == 0) { + list_size--; + for (i = (tail - 1); i >= 0; --i) { + if ((fad[i+1].pos - fad[i].pos - fad[i].len) != 0) { + break; + } + list_size--; } + if (i == -1) { + if (fad[head].pos == 0) { + ret = NIL; + } else { + hp = HAlloc(p, (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + + sb = (ErlSubBin *)(hp); + sb->thing_word = HEADER_SUB_BIN; + sb->size = fad[head].pos; + sb->offs = offset; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = bit_size; + sb->is_writable = 0; + fad[head].epos = make_binary(sb); + hp += ERL_SUB_BIN_SIZE; + + ret = NIL; + ret = CONS(hp, make_binary(sb), ret); + hp += 2; + } + erts_free_aligned_binary_bytes(temp_alloc); + ac_clean_find_all(&state); + BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); + *res_term = ret; + return DO_BIN_MATCH_OK; + } + tail = i; + tail_pos = fad[tail+1].pos; + } + } + if (hsflags & BINARY_SPLIT_TRIM_ALL) { + if (fad[head].pos == 0) { + drop++; + list_size--; + for (i = drop, j = tail; i <= j; ++i) { + if ((fad[i].pos - fad[i-1].pos - fad[i-1].len) != 0) { + break; + } + drop++; + list_size--; + } + head = drop - 1; + } + for (i = (head+1), j = tail; i <= j; ++i) { + if ((fad[i].pos - fad[i-1].pos - fad[i-1].len) == 0) { + list_size--; + } + } + + hp = HAlloc(p, list_size * (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + + if (drop == 0) { + sb = (ErlSubBin *)(hp); + sb->thing_word = HEADER_SUB_BIN; + sb->size = fad[head].pos; + sb->offs = offset; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = 0; + sb->is_writable = 0; + fad[head].epos = make_binary(sb); + hp += ERL_SUB_BIN_SIZE; } - if (i == -1) { - if (fad[0].pos == 0) { - ret = NIL; - } else { - hp = HAlloc(p, (ERL_SUB_BIN_SIZE + 2)); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + for (i = (head+1), j = tail; i <= j; ++i) { + if ((fad[i].pos - fad[i-1].pos - fad[i-1].len) != 0) { sb = (ErlSubBin *)(hp); sb->thing_word = HEADER_SUB_BIN; - sb->size = fad[0].pos; - sb->offs = offset; + sb->size = fad[i].pos - fad[i-1].pos - fad[i-1].len; + sb->offs = offset + fad[i-1].pos + fad[i-1].len; sb->orig = orig; sb->bitoffs = bit_offset; - sb->bitsize = bit_size; + sb->bitsize = 0; sb->is_writable = 0; - fad[0].epos = make_binary(sb); + fad[i].epos = make_binary(sb); hp += ERL_SUB_BIN_SIZE; + } + } - ret = NIL; - ret = CONS(hp, make_binary(sb), ret); + sb = (ErlSubBin *)(hp); + sb->thing_word = HEADER_SUB_BIN; + sb->size = tail_pos - fad[tail].pos - fad[tail].len; + sb->offs = offset + fad[tail].pos + fad[tail].len; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = bit_size; + sb->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + + ret = NIL; + ret = CONS(hp, make_binary(sb), ret); + hp += 2; + for (i = tail, j = head; i > j; --i) { + if ((fad[i].pos - fad[i-1].pos - fad[i-1].len) != 0) { + ret = CONS(hp, fad[i].epos, ret); hp += 2; } - erts_free_aligned_binary_bytes(temp_alloc); - ac_clean_find_all(&state); - BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); - *res_term = ret; - return DO_BIN_MATCH_OK; } - j = i; - k = fad[j+1].pos; - } - hp = HAlloc(p, (j + 2) * (ERL_SUB_BIN_SIZE + 2)); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); - - sb = (ErlSubBin *)(hp); - sb->thing_word = HEADER_SUB_BIN; - sb->size = fad[0].pos; - sb->offs = offset; - sb->orig = orig; - sb->bitoffs = bit_offset; - sb->bitsize = 0; - sb->is_writable = 0; - fad[0].epos = make_binary(sb); - hp += ERL_SUB_BIN_SIZE; - - for (i = 1; i <= j; ++i) { + if (drop == 0) { + ret = CONS(hp, fad[head].epos, ret); + hp += 2; + } + } else { + hp = HAlloc(p, list_size * (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + sb = (ErlSubBin *)(hp); sb->thing_word = HEADER_SUB_BIN; - sb->size = fad[i].pos - fad[i-1].pos - fad[i-1].len; - sb->offs = offset + fad[i-1].pos + fad[i-1].len; + sb->size = fad[head].pos; + sb->offs = offset; sb->orig = orig; sb->bitoffs = bit_offset; sb->bitsize = 0; sb->is_writable = 0; - fad[i].epos = make_binary(sb); + fad[head].epos = make_binary(sb); hp += ERL_SUB_BIN_SIZE; - } - ret = NIL; - sb = (ErlSubBin *)(hp); - sb->thing_word = HEADER_SUB_BIN; - sb->size = k - fad[j].pos - fad[j].len; - sb->offs = offset + fad[j].pos + fad[j].len; - sb->orig = orig; - sb->bitoffs = bit_offset; - sb->bitsize = bit_size; - sb->is_writable = 0; - hp += ERL_SUB_BIN_SIZE; - ret = CONS(hp, make_binary(sb), ret); - hp += 2; - for (i = j; i >= 0; --i) { - ret = CONS(hp, fad[i].epos, ret); + + for (i = (head+1), j = tail; i <= j; ++i) { + sb = (ErlSubBin *)(hp); + sb->thing_word = HEADER_SUB_BIN; + sb->size = fad[i].pos - fad[i-1].pos - fad[i-1].len; + sb->offs = offset + fad[i-1].pos + fad[i-1].len; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = 0; + sb->is_writable = 0; + fad[i].epos = make_binary(sb); + hp += ERL_SUB_BIN_SIZE; + } + + sb = (ErlSubBin *)(hp); + sb->thing_word = HEADER_SUB_BIN; + sb->size = tail_pos - fad[tail].pos - fad[tail].len; + sb->offs = offset + fad[tail].pos + fad[tail].len; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = bit_size; + sb->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + + ret = NIL; + ret = CONS(hp, make_binary(sb), ret); hp += 2; + for (i = tail, j = head; i >= j; --i) { + ret = CONS(hp, fad[i].epos, ret); + hp += 2; + } } } erts_free_aligned_binary_bytes(temp_alloc); @@ -2898,7 +3077,7 @@ static int do_binary_split(Process *p, Eterm subject, Uint hsstart, orig_size = binary_size(subject); - if ((hsflags & BINARY_SPLIT_TRIM) && (orig_size - pos - bm->len) == 0) { + if ((hsflags & (BINARY_SPLIT_TRIM | BINARY_SPLIT_TRIM_ALL)) && (orig_size - pos - bm->len) == 0) { if (pos == 0) { ret = NIL; } else { @@ -2919,17 +3098,23 @@ static int do_binary_split(Process *p, Eterm subject, Uint hsstart, hp += 2; } } else { - hp = HAlloc(p, 2 * (ERL_SUB_BIN_SIZE + 2)); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); - sb1 = (ErlSubBin *) hp; - sb1->thing_word = HEADER_SUB_BIN; - sb1->size = pos; - sb1->offs = offset; - sb1->orig = orig; - sb1->bitoffs = bit_offset; - sb1->bitsize = 0; - sb1->is_writable = 0; - hp += ERL_SUB_BIN_SIZE; + if ((hsflags & BINARY_SPLIT_TRIM_ALL) && (pos == 0)) { + hp = HAlloc(p, 1 * (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + sb1 = NULL; + } else { + hp = HAlloc(p, 2 * (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + sb1 = (ErlSubBin *) hp; + sb1->thing_word = HEADER_SUB_BIN; + sb1->size = pos; + sb1->offs = offset; + sb1->orig = orig; + sb1->bitoffs = bit_offset; + sb1->bitsize = 0; + sb1->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + } sb2 = (ErlSubBin *) hp; sb2->thing_word = HEADER_SUB_BIN; @@ -2944,8 +3129,10 @@ static int do_binary_split(Process *p, Eterm subject, Uint hsstart, ret = NIL; ret = CONS(hp, make_binary(sb2), ret); hp += 2; - ret = CONS(hp, make_binary(sb1), ret); - hp += 2; + if (sb1 != NULL) { + ret = CONS(hp, make_binary(sb1), ret); + hp += 2; + } } } erts_free_aligned_binary_bytes(temp_alloc); @@ -3003,7 +3190,7 @@ static int do_binary_split(Process *p, Eterm subject, Uint hsstart, orig_size = binary_size(subject); - if ((hsflags & BINARY_SPLIT_TRIM) && (orig_size - pos - rlen) == 0) { + if ((hsflags & (BINARY_SPLIT_TRIM | BINARY_SPLIT_TRIM_ALL)) && (orig_size - pos - rlen) == 0) { if (pos == 0) { ret = NIL; } else { @@ -3024,17 +3211,23 @@ static int do_binary_split(Process *p, Eterm subject, Uint hsstart, hp += 2; } } else { - hp = HAlloc(p, 2 * (ERL_SUB_BIN_SIZE + 2)); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); - sb1 = (ErlSubBin *) hp; - sb1->thing_word = HEADER_SUB_BIN; - sb1->size = pos; - sb1->offs = offset; - sb1->orig = orig; - sb1->bitoffs = bit_offset; - sb1->bitsize = 0; - sb1->is_writable = 0; - hp += ERL_SUB_BIN_SIZE; + if ((hsflags & BINARY_SPLIT_TRIM_ALL) && (pos == 0)) { + hp = HAlloc(p, 2 * (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + sb1 = NULL; + } else { + hp = HAlloc(p, 2 * (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + sb1 = (ErlSubBin *) hp; + sb1->thing_word = HEADER_SUB_BIN; + sb1->size = pos; + sb1->offs = offset; + sb1->orig = orig; + sb1->bitoffs = bit_offset; + sb1->bitsize = 0; + sb1->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + } sb2 = (ErlSubBin *) hp; sb2->thing_word = HEADER_SUB_BIN; @@ -3049,8 +3242,10 @@ static int do_binary_split(Process *p, Eterm subject, Uint hsstart, ret = NIL; ret = CONS(hp, make_binary(sb2), ret); hp += 2; - ret = CONS(hp, make_binary(sb1), ret); - hp += 2; + if (sb1 != NULL) { + ret = CONS(hp, make_binary(sb1), ret); + hp += 2; + } } } erts_free_aligned_binary_bytes(temp_alloc); @@ -3088,6 +3283,11 @@ static int parse_split_opts_list(Eterm l, Eterm bin, Uint *posp, Uint *endp, Uin l = CDR(list_val(l)); continue; } + if (t == am_trim_all) { + *optp |= BINARY_SPLIT_TRIM_ALL; + l = CDR(list_val(l)); + continue; + } } if (!is_tuple(t)) { goto badarg; -- cgit v1.2.3 From 20855f1819f91eeeb1fa746186477d3824024500 Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Wed, 26 Aug 2015 15:20:45 +0200 Subject: erts: Replace 0 with THE_NON_VALUE --- erts/emulator/beam/erl_bif_binary.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c index 1709f7671d..860c9a9779 100644 --- a/erts/emulator/beam/erl_bif_binary.c +++ b/erts/emulator/beam/erl_bif_binary.c @@ -1305,7 +1305,7 @@ static int parse_match_opts_list(Eterm l, Eterm bin, Uint *posp, Uint *endp) Eterm *tp; Uint pos; Sint len; - if (l == ((Eterm) 0) || l == NIL) { + if (l == THE_NON_VALUE || l == NIL) { /* Invalid term or NIL, we're called from binary_match(es)_2 or have no options*/ *posp = 0; @@ -1535,13 +1535,13 @@ binary_matches(Process *p, Eterm arg1, Eterm arg2, Eterm arg3) BIF_RETTYPE binary_match_2(BIF_ALIST_2) { - return binary_match(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); + return binary_match(BIF_P, BIF_ARG_1, BIF_ARG_2, THE_NON_VALUE); } BIF_RETTYPE binary_matches_2(BIF_ALIST_2) { - return binary_matches(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); + return binary_matches(BIF_P, BIF_ARG_1, BIF_ARG_2, THE_NON_VALUE); } @@ -3266,7 +3266,7 @@ static int parse_split_opts_list(Eterm l, Eterm bin, Uint *posp, Uint *endp, Uin *optp = 0; *posp = 0; *endp = binary_size(bin); - if (l == ((Eterm) 0) || l == NIL) { + if (l == THE_NON_VALUE || l == NIL) { return 0; } else if (is_list(l)) { while(is_list(l)) { @@ -3427,7 +3427,7 @@ binary_split(Process *p, Eterm arg1, Eterm arg2, Eterm arg3) BIF_RETTYPE binary_split_2(BIF_ALIST_2) { - return binary_split(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); + return binary_split(BIF_P, BIF_ARG_1, BIF_ARG_2, THE_NON_VALUE); } -- cgit v1.2.3 From 5b600aac42fdc4d08fabba682d7803351c9bfbdb Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Wed, 26 Aug 2015 15:43:00 +0200 Subject: erts: Refactor backend of binary:split to reduce code volume. --- erts/emulator/beam/erl_bif_binary.c | 659 +++++++++--------------------------- 1 file changed, 151 insertions(+), 508 deletions(-) diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c index 860c9a9779..6adc61df19 100644 --- a/erts/emulator/beam/erl_bif_binary.c +++ b/erts/emulator/beam/erl_bif_binary.c @@ -2542,6 +2542,11 @@ BIF_RETTYPE binary_copy_2(BIF_ALIST_2) return do_binary_copy(BIF_P,BIF_ARG_1,BIF_ARG_2); } +static Eterm do_split_single_result(Process*, Eterm subject, + Sint pos, Sint len, Uint hsflags); +static Eterm do_split_global_result(Process*, FindallData *fad, Uint fad_sz, + Eterm subject, Uint hsflags); + #define BINARY_SPLIT_GLOBAL 0x01 #define BINARY_SPLIT_TRIM 0x02 #define BINARY_SPLIT_TRIM_ALL 0x04 @@ -2571,7 +2576,6 @@ static int do_binary_split(Process *p, Eterm subject, Uint hsstart, if (type == am_bm) { BMData *bm; Sint pos; - Eterm ret; Eterm *hp; BMFindAllState state; Uint reds = get_reds(p, BM_LOOP_FACTOR); @@ -2591,8 +2595,7 @@ static int do_binary_split(Process *p, Eterm subject, Uint hsstart, pos = bm_find_all_non_overlapping(&state, bm, bytes, &reds); if (pos == BM_NOT_FOUND) { hp = HAlloc(p, 2); - ret = NIL; - ret = CONS(hp, subject, ret); + *res_term = CONS(hp, subject, NIL); } else if (pos == BM_RESTART) { int x = (SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + @@ -2610,196 +2613,16 @@ static int do_binary_split(Process *p, Eterm subject, Uint hsstart, bm_clean_find_all(&state); return DO_BIN_MATCH_RESTART; } else { - size_t orig_size; - Eterm orig; - Uint offset; - Uint bit_offset; - Uint bit_size; - ErlSubBin *sb; - FindallData *fad = state.out; - Sint i, j; - Sint drop = 0; - Sint head = 0; - Sint tail; - Uint list_size; - Uint tail_pos; - - tail = state.m - 1; - list_size = state.m + 1; - orig_size = binary_size(subject); - tail_pos = (Uint)(orig_size); - - if (hsflags & (BINARY_SPLIT_TRIM | BINARY_SPLIT_TRIM_ALL)) { - if ((orig_size - fad[tail].pos - fad[tail].len) == 0) { - list_size--; - for (i = (tail - 1); i >= 0; --i) { - if ((fad[i+1].pos - fad[i].pos - fad[i].len) != 0) { - break; - } - list_size--; - } - if (i == -1) { - if (fad[head].pos == 0) { - ret = NIL; - } else { - hp = HAlloc(p, (ERL_SUB_BIN_SIZE + 2)); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); - - sb = (ErlSubBin *)(hp); - sb->thing_word = HEADER_SUB_BIN; - sb->size = fad[head].pos; - sb->offs = offset; - sb->orig = orig; - sb->bitoffs = bit_offset; - sb->bitsize = bit_size; - sb->is_writable = 0; - fad[head].epos = make_binary(sb); - hp += ERL_SUB_BIN_SIZE; - - ret = NIL; - ret = CONS(hp, make_binary(sb), ret); - hp += 2; - } - erts_free_aligned_binary_bytes(temp_alloc); - bm_clean_find_all(&state); - BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); - *res_term = ret; - return DO_BIN_MATCH_OK; - } - tail = i; - tail_pos = fad[tail+1].pos; - } - } - if (hsflags & BINARY_SPLIT_TRIM_ALL) { - if (fad[head].pos == 0) { - drop++; - list_size--; - for (i = drop, j = tail; i <= j; ++i) { - if ((fad[i].pos - fad[i-1].pos - fad[i-1].len) != 0) { - break; - } - drop++; - list_size--; - } - head = drop - 1; - } - for (i = (head+1), j = tail; i <= j; ++i) { - if ((fad[i].pos - fad[i-1].pos - fad[i-1].len) == 0) { - list_size--; - } - } - - hp = HAlloc(p, list_size * (ERL_SUB_BIN_SIZE + 2)); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); - - if (drop == 0) { - sb = (ErlSubBin *)(hp); - sb->thing_word = HEADER_SUB_BIN; - sb->size = fad[head].pos; - sb->offs = offset; - sb->orig = orig; - sb->bitoffs = bit_offset; - sb->bitsize = 0; - sb->is_writable = 0; - fad[head].epos = make_binary(sb); - hp += ERL_SUB_BIN_SIZE; - } - - for (i = (head+1), j = tail; i <= j; ++i) { - if ((fad[i].pos - fad[i-1].pos - fad[i-1].len) != 0) { - sb = (ErlSubBin *)(hp); - sb->thing_word = HEADER_SUB_BIN; - sb->size = fad[i].pos - fad[i-1].pos - fad[i-1].len; - sb->offs = offset + fad[i-1].pos + fad[i-1].len; - sb->orig = orig; - sb->bitoffs = bit_offset; - sb->bitsize = 0; - sb->is_writable = 0; - fad[i].epos = make_binary(sb); - hp += ERL_SUB_BIN_SIZE; - } - } - - sb = (ErlSubBin *)(hp); - sb->thing_word = HEADER_SUB_BIN; - sb->size = tail_pos - fad[tail].pos - fad[tail].len; - sb->offs = offset + fad[tail].pos + fad[tail].len; - sb->orig = orig; - sb->bitoffs = bit_offset; - sb->bitsize = bit_size; - sb->is_writable = 0; - hp += ERL_SUB_BIN_SIZE; - - ret = NIL; - ret = CONS(hp, make_binary(sb), ret); - hp += 2; - for (i = tail, j = head; i > j; --i) { - if ((fad[i].pos - fad[i-1].pos - fad[i-1].len) != 0) { - ret = CONS(hp, fad[i].epos, ret); - hp += 2; - } - } - if (drop == 0) { - ret = CONS(hp, fad[head].epos, ret); - hp += 2; - } - } else { - hp = HAlloc(p, list_size * (ERL_SUB_BIN_SIZE + 2)); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); - - sb = (ErlSubBin *)(hp); - sb->thing_word = HEADER_SUB_BIN; - sb->size = fad[head].pos; - sb->offs = offset; - sb->orig = orig; - sb->bitoffs = bit_offset; - sb->bitsize = 0; - sb->is_writable = 0; - fad[head].epos = make_binary(sb); - hp += ERL_SUB_BIN_SIZE; - - for (i = (head+1), j = tail; i <= j; ++i) { - sb = (ErlSubBin *)(hp); - sb->thing_word = HEADER_SUB_BIN; - sb->size = fad[i].pos - fad[i-1].pos - fad[i-1].len; - sb->offs = offset + fad[i-1].pos + fad[i-1].len; - sb->orig = orig; - sb->bitoffs = bit_offset; - sb->bitsize = 0; - sb->is_writable = 0; - fad[i].epos = make_binary(sb); - hp += ERL_SUB_BIN_SIZE; - } - - sb = (ErlSubBin *)(hp); - sb->thing_word = HEADER_SUB_BIN; - sb->size = tail_pos - fad[tail].pos - fad[tail].len; - sb->offs = offset + fad[tail].pos + fad[tail].len; - sb->orig = orig; - sb->bitoffs = bit_offset; - sb->bitsize = bit_size; - sb->is_writable = 0; - hp += ERL_SUB_BIN_SIZE; - - ret = NIL; - ret = CONS(hp, make_binary(sb), ret); - hp += 2; - for (i = tail, j = head; i >= j; --i) { - ret = CONS(hp, fad[i].epos, ret); - hp += 2; - } - } + *res_term = do_split_global_result(p, state.out, state.m, subject, hsflags); } erts_free_aligned_binary_bytes(temp_alloc); bm_clean_find_all(&state); BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); - *res_term = ret; return DO_BIN_MATCH_OK; } else if (type == am_ac) { ACTrie *act; int acr; ACFindAllState state; - Eterm ret; Eterm *hp; Uint reds = get_reds(p, AC_LOOP_FACTOR); Uint save_reds = reds; @@ -2817,8 +2640,7 @@ static int do_binary_split(Process *p, Eterm subject, Uint hsstart, acr = ac_find_all_non_overlapping(&state, bytes, &reds); if (acr == AC_NOT_FOUND) { hp = HAlloc(p, 2); - ret = NIL; - ret = CONS(hp, subject, ret); + *res_term = CONS(hp, subject, NIL); } else if (acr == AC_RESTART) { int x = (SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + !!(SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm)); @@ -2835,197 +2657,17 @@ static int do_binary_split(Process *p, Eterm subject, Uint hsstart, ac_clean_find_all(&state); return DO_BIN_MATCH_RESTART; } else { - size_t orig_size; - Eterm orig; - Uint offset; - Uint bit_offset; - Uint bit_size; - ErlSubBin *sb; - FindallData *fad = state.out; - Sint i, j; - Sint drop = 0; - Sint head = 0; - Sint tail; - Uint list_size; - Uint tail_pos; - - tail = state.m - 1; - list_size = state.m + 1; - orig_size = binary_size(subject); - tail_pos = (Uint)(orig_size); - - if (hsflags & (BINARY_SPLIT_TRIM | BINARY_SPLIT_TRIM_ALL)) { - if ((orig_size - fad[tail].pos - fad[tail].len) == 0) { - list_size--; - for (i = (tail - 1); i >= 0; --i) { - if ((fad[i+1].pos - fad[i].pos - fad[i].len) != 0) { - break; - } - list_size--; - } - if (i == -1) { - if (fad[head].pos == 0) { - ret = NIL; - } else { - hp = HAlloc(p, (ERL_SUB_BIN_SIZE + 2)); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); - - sb = (ErlSubBin *)(hp); - sb->thing_word = HEADER_SUB_BIN; - sb->size = fad[head].pos; - sb->offs = offset; - sb->orig = orig; - sb->bitoffs = bit_offset; - sb->bitsize = bit_size; - sb->is_writable = 0; - fad[head].epos = make_binary(sb); - hp += ERL_SUB_BIN_SIZE; - - ret = NIL; - ret = CONS(hp, make_binary(sb), ret); - hp += 2; - } - erts_free_aligned_binary_bytes(temp_alloc); - ac_clean_find_all(&state); - BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); - *res_term = ret; - return DO_BIN_MATCH_OK; - } - tail = i; - tail_pos = fad[tail+1].pos; - } - } - if (hsflags & BINARY_SPLIT_TRIM_ALL) { - if (fad[head].pos == 0) { - drop++; - list_size--; - for (i = drop, j = tail; i <= j; ++i) { - if ((fad[i].pos - fad[i-1].pos - fad[i-1].len) != 0) { - break; - } - drop++; - list_size--; - } - head = drop - 1; - } - for (i = (head+1), j = tail; i <= j; ++i) { - if ((fad[i].pos - fad[i-1].pos - fad[i-1].len) == 0) { - list_size--; - } - } - - hp = HAlloc(p, list_size * (ERL_SUB_BIN_SIZE + 2)); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); - - if (drop == 0) { - sb = (ErlSubBin *)(hp); - sb->thing_word = HEADER_SUB_BIN; - sb->size = fad[head].pos; - sb->offs = offset; - sb->orig = orig; - sb->bitoffs = bit_offset; - sb->bitsize = 0; - sb->is_writable = 0; - fad[head].epos = make_binary(sb); - hp += ERL_SUB_BIN_SIZE; - } - - for (i = (head+1), j = tail; i <= j; ++i) { - if ((fad[i].pos - fad[i-1].pos - fad[i-1].len) != 0) { - sb = (ErlSubBin *)(hp); - sb->thing_word = HEADER_SUB_BIN; - sb->size = fad[i].pos - fad[i-1].pos - fad[i-1].len; - sb->offs = offset + fad[i-1].pos + fad[i-1].len; - sb->orig = orig; - sb->bitoffs = bit_offset; - sb->bitsize = 0; - sb->is_writable = 0; - fad[i].epos = make_binary(sb); - hp += ERL_SUB_BIN_SIZE; - } - } - - sb = (ErlSubBin *)(hp); - sb->thing_word = HEADER_SUB_BIN; - sb->size = tail_pos - fad[tail].pos - fad[tail].len; - sb->offs = offset + fad[tail].pos + fad[tail].len; - sb->orig = orig; - sb->bitoffs = bit_offset; - sb->bitsize = bit_size; - sb->is_writable = 0; - hp += ERL_SUB_BIN_SIZE; - - ret = NIL; - ret = CONS(hp, make_binary(sb), ret); - hp += 2; - for (i = tail, j = head; i > j; --i) { - if ((fad[i].pos - fad[i-1].pos - fad[i-1].len) != 0) { - ret = CONS(hp, fad[i].epos, ret); - hp += 2; - } - } - if (drop == 0) { - ret = CONS(hp, fad[head].epos, ret); - hp += 2; - } - } else { - hp = HAlloc(p, list_size * (ERL_SUB_BIN_SIZE + 2)); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); - - sb = (ErlSubBin *)(hp); - sb->thing_word = HEADER_SUB_BIN; - sb->size = fad[head].pos; - sb->offs = offset; - sb->orig = orig; - sb->bitoffs = bit_offset; - sb->bitsize = 0; - sb->is_writable = 0; - fad[head].epos = make_binary(sb); - hp += ERL_SUB_BIN_SIZE; - - for (i = (head+1), j = tail; i <= j; ++i) { - sb = (ErlSubBin *)(hp); - sb->thing_word = HEADER_SUB_BIN; - sb->size = fad[i].pos - fad[i-1].pos - fad[i-1].len; - sb->offs = offset + fad[i-1].pos + fad[i-1].len; - sb->orig = orig; - sb->bitoffs = bit_offset; - sb->bitsize = 0; - sb->is_writable = 0; - fad[i].epos = make_binary(sb); - hp += ERL_SUB_BIN_SIZE; - } - - sb = (ErlSubBin *)(hp); - sb->thing_word = HEADER_SUB_BIN; - sb->size = tail_pos - fad[tail].pos - fad[tail].len; - sb->offs = offset + fad[tail].pos + fad[tail].len; - sb->orig = orig; - sb->bitoffs = bit_offset; - sb->bitsize = bit_size; - sb->is_writable = 0; - hp += ERL_SUB_BIN_SIZE; - - ret = NIL; - ret = CONS(hp, make_binary(sb), ret); - hp += 2; - for (i = tail, j = head; i >= j; --i) { - ret = CONS(hp, fad[i].epos, ret); - hp += 2; - } - } + *res_term = do_split_global_result(p, state.out, state.m, subject, hsflags); } erts_free_aligned_binary_bytes(temp_alloc); ac_clean_find_all(&state); BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); - *res_term = ret; return DO_BIN_MATCH_OK; } } else { if (type == am_bm) { BMData *bm; Sint pos; - Eterm ret; Eterm *hp; BMFindFirstState state; Uint reds = get_reds(p, BM_LOOP_FACTOR); @@ -3049,8 +2691,7 @@ static int do_binary_split(Process *p, Eterm subject, Uint hsstart, pos = bm_find_first_match(&state, bm, bytes, &reds); if (pos == BM_NOT_FOUND) { hp = HAlloc(p, 2); - ret = NIL; - ret = CONS(hp, subject, ret); + *res_term = CONS(hp, subject, NIL); } else if (pos == BM_RESTART) { int x = (sizeof(state) / sizeof(Eterm)) + @@ -3067,84 +2708,16 @@ static int do_binary_split(Process *p, Eterm subject, Uint hsstart, erts_free_aligned_binary_bytes(temp_alloc); return DO_BIN_MATCH_RESTART; } else { - size_t orig_size; - Eterm orig; - Uint offset; - Uint bit_offset; - Uint bit_size; - ErlSubBin *sb1; - ErlSubBin *sb2; - - orig_size = binary_size(subject); - - if ((hsflags & (BINARY_SPLIT_TRIM | BINARY_SPLIT_TRIM_ALL)) && (orig_size - pos - bm->len) == 0) { - if (pos == 0) { - ret = NIL; - } else { - hp = HAlloc(p, (ERL_SUB_BIN_SIZE + 2)); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); - sb1 = (ErlSubBin *) hp; - sb1->thing_word = HEADER_SUB_BIN; - sb1->size = pos; - sb1->offs = offset; - sb1->orig = orig; - sb1->bitoffs = bit_offset; - sb1->bitsize = bit_size; - sb1->is_writable = 0; - hp += ERL_SUB_BIN_SIZE; - - ret = NIL; - ret = CONS(hp, make_binary(sb1), ret); - hp += 2; - } - } else { - if ((hsflags & BINARY_SPLIT_TRIM_ALL) && (pos == 0)) { - hp = HAlloc(p, 1 * (ERL_SUB_BIN_SIZE + 2)); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); - sb1 = NULL; - } else { - hp = HAlloc(p, 2 * (ERL_SUB_BIN_SIZE + 2)); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); - sb1 = (ErlSubBin *) hp; - sb1->thing_word = HEADER_SUB_BIN; - sb1->size = pos; - sb1->offs = offset; - sb1->orig = orig; - sb1->bitoffs = bit_offset; - sb1->bitsize = 0; - sb1->is_writable = 0; - hp += ERL_SUB_BIN_SIZE; - } - - sb2 = (ErlSubBin *) hp; - sb2->thing_word = HEADER_SUB_BIN; - sb2->size = orig_size - pos - bm->len; - sb2->offs = offset + pos + bm->len; - sb2->orig = orig; - sb2->bitoffs = bit_offset; - sb2->bitsize = bit_size; - sb2->is_writable = 0; - hp += ERL_SUB_BIN_SIZE; - - ret = NIL; - ret = CONS(hp, make_binary(sb2), ret); - hp += 2; - if (sb1 != NULL) { - ret = CONS(hp, make_binary(sb1), ret); - hp += 2; - } - } + *res_term = do_split_single_result(p, subject, pos, bm->len, hsflags); } erts_free_aligned_binary_bytes(temp_alloc); BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); - *res_term = ret; return DO_BIN_MATCH_OK; } else if (type == am_ac) { ACTrie *act; Uint pos, rlen; int acr; ACFindFirstState state; - Eterm ret; Eterm *hp; Uint reds = get_reds(p, AC_LOOP_FACTOR); Uint save_reds = reds; @@ -3162,8 +2735,7 @@ static int do_binary_split(Process *p, Eterm subject, Uint hsstart, acr = ac_find_first_match(&state, bytes, &pos, &rlen, &reds); if (acr == AC_NOT_FOUND) { hp = HAlloc(p, 2); - ret = NIL; - ret = CONS(hp, subject, ret); + *res_term = CONS(hp, subject, NIL); } else if (acr == AC_RESTART) { int x = (sizeof(state) / sizeof(Eterm)) + @@ -3180,77 +2752,10 @@ static int do_binary_split(Process *p, Eterm subject, Uint hsstart, erts_free_aligned_binary_bytes(temp_alloc); return DO_BIN_MATCH_RESTART; } else { - size_t orig_size; - Eterm orig; - Uint offset; - Uint bit_offset; - Uint bit_size; - ErlSubBin *sb1; - ErlSubBin *sb2; - - orig_size = binary_size(subject); - - if ((hsflags & (BINARY_SPLIT_TRIM | BINARY_SPLIT_TRIM_ALL)) && (orig_size - pos - rlen) == 0) { - if (pos == 0) { - ret = NIL; - } else { - hp = HAlloc(p, (ERL_SUB_BIN_SIZE + 2)); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); - sb1 = (ErlSubBin *) hp; - sb1->thing_word = HEADER_SUB_BIN; - sb1->size = pos; - sb1->offs = offset; - sb1->orig = orig; - sb1->bitoffs = bit_offset; - sb1->bitsize = bit_size; - sb1->is_writable = 0; - hp += ERL_SUB_BIN_SIZE; - - ret = NIL; - ret = CONS(hp, make_binary(sb1), ret); - hp += 2; - } - } else { - if ((hsflags & BINARY_SPLIT_TRIM_ALL) && (pos == 0)) { - hp = HAlloc(p, 2 * (ERL_SUB_BIN_SIZE + 2)); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); - sb1 = NULL; - } else { - hp = HAlloc(p, 2 * (ERL_SUB_BIN_SIZE + 2)); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); - sb1 = (ErlSubBin *) hp; - sb1->thing_word = HEADER_SUB_BIN; - sb1->size = pos; - sb1->offs = offset; - sb1->orig = orig; - sb1->bitoffs = bit_offset; - sb1->bitsize = 0; - sb1->is_writable = 0; - hp += ERL_SUB_BIN_SIZE; - } - - sb2 = (ErlSubBin *) hp; - sb2->thing_word = HEADER_SUB_BIN; - sb2->size = orig_size - pos - rlen; - sb2->offs = offset + pos + rlen; - sb2->orig = orig; - sb2->bitoffs = bit_offset; - sb2->bitsize = bit_size; - sb2->is_writable = 0; - hp += ERL_SUB_BIN_SIZE; - - ret = NIL; - ret = CONS(hp, make_binary(sb2), ret); - hp += 2; - if (sb1 != NULL) { - ret = CONS(hp, make_binary(sb1), ret); - hp += 2; - } - } + *res_term = do_split_single_result(p, subject, pos, rlen, hsflags); } erts_free_aligned_binary_bytes(temp_alloc); BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); - *res_term = ret; return DO_BIN_MATCH_OK; } } @@ -3258,6 +2763,144 @@ static int do_binary_split(Process *p, Eterm subject, Uint hsstart, return DO_BIN_MATCH_BADARG; } +static Eterm do_split_single_result(Process* p, Eterm subject, + Sint pos, Sint len, Uint hsflags) +{ + size_t orig_size; + Eterm orig; + Uint offset; + Uint bit_offset; + Uint bit_size; + ErlSubBin *sb1; + ErlSubBin *sb2; + Eterm* hp; + Eterm ret; + + orig_size = binary_size(subject); + + if ((hsflags & (BINARY_SPLIT_TRIM | BINARY_SPLIT_TRIM_ALL)) && (orig_size - pos - len) == 0) { + if (pos != 0) { + ret = NIL; + } else { + hp = HAlloc(p, (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + sb1 = (ErlSubBin *) hp; + sb1->thing_word = HEADER_SUB_BIN; + sb1->size = pos; + sb1->offs = offset; + sb1->orig = orig; + sb1->bitoffs = bit_offset; + sb1->bitsize = bit_size; + sb1->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + + ret = CONS(hp, make_binary(sb1), NIL); + hp += 2; + } + } else { + if ((hsflags & BINARY_SPLIT_TRIM_ALL) && (pos == 0)) { + hp = HAlloc(p, 1 * (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + sb1 = NULL; + } else { + hp = HAlloc(p, 2 * (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + sb1 = (ErlSubBin *) hp; + sb1->thing_word = HEADER_SUB_BIN; + sb1->size = pos; + sb1->offs = offset; + sb1->orig = orig; + sb1->bitoffs = bit_offset; + sb1->bitsize = 0; + sb1->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + } + + sb2 = (ErlSubBin *) hp; + sb2->thing_word = HEADER_SUB_BIN; + sb2->size = orig_size - pos - len; + sb2->offs = offset + pos + len; + sb2->orig = orig; + sb2->bitoffs = bit_offset; + sb2->bitsize = bit_size; + sb2->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + + ret = CONS(hp, make_binary(sb2), NIL); + hp += 2; + if (sb1 != NULL) { + ret = CONS(hp, make_binary(sb1), ret); + hp += 2; + } + } + return ret; +} + +static Eterm do_split_global_result(Process* p, FindallData *fad, Uint fad_sz, + Uint subject, Uint hsflags) +{ + size_t orig_size; + Eterm orig; + Uint offset; + Uint bit_offset; + Uint bit_size; + ErlSubBin *sb; + Sint i; + Sint tail; + Uint list_size; + Uint end_pos; + Uint do_trim = hsflags & (BINARY_SPLIT_TRIM | BINARY_SPLIT_TRIM_ALL); + Eterm* hp; + Eterm* hendp; + Eterm ret; + + tail = fad_sz - 1; + list_size = fad_sz + 1; + orig_size = binary_size(subject); + end_pos = (Uint)(orig_size); + + hp = HAlloc(p, list_size * (ERL_SUB_BIN_SIZE + 2)); + hendp = hp + list_size * (ERL_SUB_BIN_SIZE + 2); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + ASSERT(bit_size == 0); + + ret = NIL; + + for (i = tail; i >= 0; --i) { + sb = (ErlSubBin *)(hp); + sb->size = end_pos - (fad[i].pos + fad[i].len); + if (!(sb->size == 0 && do_trim)) { + sb->thing_word = HEADER_SUB_BIN; + sb->offs = offset + fad[i].pos + fad[i].len; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = 0; + sb->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + ret = CONS(hp, make_binary(sb), ret); + hp += 2; + do_trim &= ~BINARY_SPLIT_TRIM; + } + end_pos = fad[i].pos; + } + + sb = (ErlSubBin *)(hp); + sb->size = fad[0].pos; + if (!(sb->size == 0 && do_trim)) { + sb->thing_word = HEADER_SUB_BIN; + sb->offs = offset; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = 0; + sb->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + ret = CONS(hp, make_binary(sb), ret); + hp += 2; + } + HRelease(p, hendp, hp); + return ret; +} + static int parse_split_opts_list(Eterm l, Eterm bin, Uint *posp, Uint *endp, Uint *optp) { Eterm *tp; -- cgit v1.2.3 From 55777538791419a6c3d86c1c440c7eb7fbdd5e51 Mon Sep 17 00:00:00 2001 From: Andrew Bennett Date: Thu, 10 Sep 2015 13:40:21 -0600 Subject: erts: Refactor BIF for binary:match,matches,split with an common do_binary_find() used by match, matches and split. --- erts/emulator/beam/atom.names | 4 +- erts/emulator/beam/erl_bif_binary.c | 1659 ++++++++++++++--------------------- 2 files changed, 678 insertions(+), 985 deletions(-) diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index 5f27aaa14b..7a50b24818 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -117,11 +117,9 @@ atom bif_timer_server atom binary atom binary_bin_to_list_trap atom binary_copy_trap +atom binary_find_trap atom binary_longest_prefix_trap atom binary_longest_suffix_trap -atom binary_match_trap -atom binary_matches_trap -atom binary_split_trap atom binary_to_list_continue atom binary_to_term_trap atom block diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c index 6adc61df19..9f72b8c0ac 100644 --- a/erts/emulator/beam/erl_bif_binary.c +++ b/erts/emulator/beam/erl_bif_binary.c @@ -55,10 +55,8 @@ /* Init and local variables */ -static Export binary_match_trap_export; -static BIF_RETTYPE binary_match_trap(BIF_ALIST_3); -static Export binary_matches_trap_export; -static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3); +static Export binary_find_trap_export; +static BIF_RETTYPE binary_find_trap(BIF_ALIST_3); static Export binary_longest_prefix_trap_export; static BIF_RETTYPE binary_longest_prefix_trap(BIF_ALIST_3); static Export binary_longest_suffix_trap_export; @@ -67,26 +65,18 @@ static Export binary_bin_to_list_trap_export; static BIF_RETTYPE binary_bin_to_list_trap(BIF_ALIST_3); static Export binary_copy_trap_export; static BIF_RETTYPE binary_copy_trap(BIF_ALIST_2); -static Export binary_split_trap_export; -static BIF_RETTYPE binary_split_trap(BIF_ALIST_3); static Uint max_loop_limit; static BIF_RETTYPE -binary_match(Process *p, Eterm arg1, Eterm arg2, Eterm arg3); -static BIF_RETTYPE -binary_matches(Process *p, Eterm arg1, Eterm arg2, Eterm arg3); +binary_match(Process *p, Eterm arg1, Eterm arg2, Eterm arg3, Uint flags); static BIF_RETTYPE binary_split(Process *p, Eterm arg1, Eterm arg2, Eterm arg3); void erts_init_bif_binary(void) { - erts_init_trap_export(&binary_match_trap_export, - am_erlang, am_binary_match_trap, 3, - &binary_match_trap); - - erts_init_trap_export(&binary_matches_trap_export, - am_erlang, am_binary_matches_trap, 3, - &binary_matches_trap); + erts_init_trap_export(&binary_find_trap_export, + am_erlang, am_binary_find_trap, 3, + &binary_find_trap); erts_init_trap_export(&binary_longest_prefix_trap_export, am_erlang, am_binary_longest_prefix_trap, 3, @@ -104,10 +94,6 @@ void erts_init_bif_binary(void) am_erlang, am_binary_copy_trap, 2, &binary_copy_trap); - erts_init_trap_export(&binary_split_trap_export, - am_erlang, am_binary_split_trap, 3, - &binary_split_trap); - max_loop_limit = 0; return; } @@ -322,8 +308,8 @@ static BMData *create_bmdata(MyAllocator *my, byte *x, Uint len, /* * Aho Corasick - Build a Trie and fill in the failure functions * when all strings are added. - * The algorithm is nicely described by Dieter Bühler of University of - * Tübingen: + * The algorithm is nicely described by Dieter Bühler of University of + * Tübingen: * http://www-sr.informatik.uni-tuebingen.de/~buehler/AC/AC.html */ @@ -573,9 +559,6 @@ static void ac_clean_find_all(ACFindAllState *state) #endif } -#define SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(S) \ - (sizeof(ACFindAllState)+(sizeof(FindallData)*(S).m)) - /* * Differs to the find_first function in that it stores all matches and the values * arte returned only in the state. @@ -853,9 +836,6 @@ static void bm_clean_find_all(BMFindAllState *state) #endif } -#define SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(S) \ - (sizeof(BMFindAllState)+(sizeof(FindallData)*(S).m)) - /* * Differs to the find_first function in that it stores all matches and the * values are returned only in the state. @@ -1038,267 +1018,36 @@ BIF_RETTYPE binary_compile_pattern_1(BIF_ALIST_1) #define DO_BIN_MATCH_BADARG -1 #define DO_BIN_MATCH_RESTART -2 -static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hsend, - Eterm type, Binary *bin, Eterm state_term, - Eterm *res_term) -{ - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - - ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc); - } - if (state_term != NIL) { - Eterm *ptr = big_val(state_term); - type = ptr[1]; - } - - if (type == am_bm) { - BMData *bm; - Sint pos; - Eterm ret; - Eterm *hp; - BMFindFirstState state; - Uint reds = get_reds(p, BM_LOOP_FACTOR); - Uint save_reds = reds; - - bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_bm_data(bm); -#endif - if (state_term == NIL) { - bm_init_find_first_match(&state, hsstart, hsend); - } else { - Eterm *ptr = big_val(state_term); - memcpy(&state,ptr+2,sizeof(state)); - } -#ifdef HARDDEBUG - erts_printf("(bm) state->pos = %ld, state->len = %lu\n",state.pos, - state.len); -#endif - pos = bm_find_first_match(&state, bm, bytes, &reds); - if (pos == BM_NOT_FOUND) { - ret = am_nomatch; - } else if (pos == BM_RESTART) { - int x = (sizeof(BMFindFirstState) / sizeof(Eterm)) + - !!(sizeof(BMFindFirstState) % sizeof(Eterm)); -#ifdef HARDDEBUG - erts_printf("Trap bm!\n"); -#endif - hp = HAlloc(p,x+2); - hp[0] = make_pos_bignum_header(x+1); - hp[1] = type; - memcpy(hp+2,&state,sizeof(state)); - *res_term = make_big(hp); - erts_free_aligned_binary_bytes(temp_alloc); - return DO_BIN_MATCH_RESTART; - } else { - Eterm erlen = erts_make_integer((Uint) bm->len, p); - ret = erts_make_integer(pos,p); - hp = HAlloc(p,3); - ret = TUPLE2(hp, ret, erlen); - } - erts_free_aligned_binary_bytes(temp_alloc); - BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); - *res_term = ret; - return DO_BIN_MATCH_OK; - } else if (type == am_ac) { - ACTrie *act; - Uint pos, rlen; - int acr; - ACFindFirstState state; - Eterm ret; - Eterm *hp; - Uint reds = get_reds(p, AC_LOOP_FACTOR); - Uint save_reds = reds; - - act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_ac_trie(act); -#endif - if (state_term == NIL) { - ac_init_find_first_match(&state, act, hsstart, hsend); - } else { - Eterm *ptr = big_val(state_term); - memcpy(&state,ptr+2,sizeof(state)); - } - acr = ac_find_first_match(&state, bytes, &pos, &rlen, &reds); - if (acr == AC_NOT_FOUND) { - ret = am_nomatch; - } else if (acr == AC_RESTART) { - int x = (sizeof(state) / sizeof(Eterm)) + - !!(sizeof(ACFindFirstState) % sizeof(Eterm)); -#ifdef HARDDEBUG - erts_printf("Trap ac!\n"); -#endif - hp = HAlloc(p,x+2); - hp[0] = make_pos_bignum_header(x+1); - hp[1] = type; - memcpy(hp+2,&state,sizeof(state)); - *res_term = make_big(hp); - erts_free_aligned_binary_bytes(temp_alloc); - return DO_BIN_MATCH_RESTART; - } else { - Eterm epos = erts_make_integer(pos,p); - Eterm erlen = erts_make_integer(rlen,p); - hp = HAlloc(p,3); - ret = TUPLE2(hp, epos, erlen); - } - erts_free_aligned_binary_bytes(temp_alloc); - BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); - *res_term = ret; - return DO_BIN_MATCH_OK; - } - badarg: - return DO_BIN_MATCH_BADARG; -} - -static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, - Uint hsend, Eterm type, Binary *bin, - Eterm state_term, Eterm *res_term) -{ - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - - ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc); - } - if (state_term != NIL) { - Eterm *ptr = big_val(state_term); - type = ptr[1]; - } - - if (type == am_bm) { - BMData *bm; - Sint pos; - Eterm ret,tpl; - Eterm *hp; - BMFindAllState state; - Uint reds = get_reds(p, BM_LOOP_FACTOR); - Uint save_reds = reds; - - bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_bm_data(bm); -#endif - if (state_term == NIL) { - bm_init_find_all(&state, hsstart, hsend); - } else { - Eterm *ptr = big_val(state_term); - bm_restore_find_all(&state,(char *) (ptr+2)); - } - - pos = bm_find_all_non_overlapping(&state, bm, bytes, &reds); - if (pos == BM_NOT_FOUND) { - ret = NIL; - } else if (pos == BM_RESTART) { - int x = - (SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + - !!(SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm)); -#ifdef HARDDEBUG - erts_printf("Trap bm!\n"); -#endif - hp = HAlloc(p,x+2); - hp[0] = make_pos_bignum_header(x+1); - hp[1] = type; - bm_serialize_find_all(&state, (char *) (hp+2)); - *res_term = make_big(hp); - erts_free_aligned_binary_bytes(temp_alloc); - bm_clean_find_all(&state); - return DO_BIN_MATCH_RESTART; - } else { - FindallData *fad = state.out; - int i; - for (i = 0; i < state.m; ++i) { - fad[i].epos = erts_make_integer(fad[i].pos,p); - fad[i].elen = erts_make_integer(fad[i].len,p); - } - hp = HAlloc(p,state.m * (3 + 2)); - ret = NIL; - for (i = state.m - 1; i >= 0; --i) { - tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); - hp +=3; - ret = CONS(hp,tpl,ret); - hp += 2; - } - } - erts_free_aligned_binary_bytes(temp_alloc); - bm_clean_find_all(&state); - BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); - *res_term = ret; - return DO_BIN_MATCH_OK; - } else if (type == am_ac) { - ACTrie *act; - int acr; - ACFindAllState state; - Eterm ret,tpl; - Eterm *hp; - Uint reds = get_reds(p, AC_LOOP_FACTOR); - Uint save_reds = reds; +#define BINARY_FIND_ALL 0x01 +#define BINARY_SPLIT_TRIM 0x02 +#define BINARY_SPLIT_TRIM_ALL 0x04 - act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_ac_trie(act); -#endif - if (state_term == NIL) { - ac_init_find_all(&state, act, hsstart, hsend); - } else { - Eterm *ptr = big_val(state_term); - ac_restore_find_all(&state,(char *) (ptr+2)); - } - acr = ac_find_all_non_overlapping(&state, bytes, &reds); - if (acr == AC_NOT_FOUND) { - ret = NIL; - } else if (acr == AC_RESTART) { - int x = - (SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + - !!(SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm)); -#ifdef HARDDEBUG - erts_printf("Trap ac!\n"); -#endif - hp = HAlloc(p,x+2); - hp[0] = make_pos_bignum_header(x+1); - hp[1] = type; - ac_serialize_find_all(&state, (char *) (hp+2)); - *res_term = make_big(hp); - erts_free_aligned_binary_bytes(temp_alloc); - ac_clean_find_all(&state); - return DO_BIN_MATCH_RESTART; - } else { - FindallData *fad = state.out; - int i; - for (i = 0; i < state.m; ++i) { - fad[i].epos = erts_make_integer(fad[i].pos,p); - fad[i].elen = erts_make_integer(fad[i].len,p); - } - hp = HAlloc(p,state.m * (3 + 2)); - ret = NIL; - for (i = state.m - 1; i >= 0; --i) { - tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); - hp +=3; - ret = CONS(hp,tpl,ret); - hp += 2; - } - } - erts_free_aligned_binary_bytes(temp_alloc); - ac_clean_find_all(&state); - BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); - *res_term = ret; - return DO_BIN_MATCH_OK; - } - badarg: - return DO_BIN_MATCH_BADARG; -} +typedef struct BinaryFindState { + Eterm type; + Uint flags; + Uint hsstart; + Uint hsend; + Eterm (*not_found_result) (Process *, Eterm, struct BinaryFindState *); + Eterm (*single_result) (Process *, Eterm, struct BinaryFindState *, Sint, Sint); + Eterm (*global_result) (Process *, Eterm, struct BinaryFindState *, FindallData *, Uint); +} BinaryFindState; + +#define SIZEOF_BINARY_FIND_STATE(S) \ + (sizeof(BinaryFindState)+sizeof(S)) + +#define SIZEOF_BINARY_FIND_ALL_STATE(S) \ + (sizeof(BinaryFindState)+sizeof(S)+(sizeof(FindallData)*(S).m)) + +static Eterm do_match_not_found_result(Process *p, Eterm subject, BinaryFindState *bfs); +static Eterm do_match_single_result(Process *p, Eterm subject, BinaryFindState *bfs, + Sint pos, Sint len); +static Eterm do_match_global_result(Process *p, Eterm subject, BinaryFindState *bfs, + FindallData *fad, Uint fad_sz); +static Eterm do_split_not_found_result(Process *p, Eterm subject, BinaryFindState *bfs); +static Eterm do_split_single_result(Process *p, Eterm subject, BinaryFindState *bfs, + Sint pos, Sint len); +static Eterm do_split_global_result(Process *p, Eterm subject, BinaryFindState *bfs, + FindallData *fad, Uint fad_sz); static int parse_match_opts_list(Eterm l, Eterm bin, Uint *posp, Uint *endp) { @@ -1363,116 +1112,298 @@ static int parse_match_opts_list(Eterm l, Eterm bin, Uint *posp, Uint *endp) } } -static BIF_RETTYPE binary_match_trap(BIF_ALIST_3) -{ - int runres; - Eterm result; - Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val; - runres = do_binary_match(BIF_P,BIF_ARG_1,0,0,NIL,bin,BIF_ARG_2,&result); - if (runres == DO_BIN_MATCH_OK) { - BIF_RET(result); - } else { - BUMP_ALL_REDS(BIF_P); - BIF_TRAP3(&binary_match_trap_export, BIF_P, BIF_ARG_1, result, - BIF_ARG_3); - } -} - -static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3) +static int parse_split_opts_list(Eterm l, Eterm bin, Uint *posp, Uint *endp, Uint *optp) { - int runres; - Eterm result; - Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val; - runres = do_binary_matches(BIF_P,BIF_ARG_1,0,0,NIL,bin,BIF_ARG_2,&result); - if (runres == DO_BIN_MATCH_OK) { - BIF_RET(result); + Eterm *tp; + Uint pos; + Sint len; + *optp = 0; + *posp = 0; + *endp = binary_size(bin); + if (l == THE_NON_VALUE || l == NIL) { + return 0; + } else if (is_list(l)) { + while(is_list(l)) { + Eterm t = CAR(list_val(l)); + Uint orig_size; + if (is_atom(t)) { + if (t == am_global) { + *optp |= BINARY_FIND_ALL; + l = CDR(list_val(l)); + continue; + } + if (t == am_trim) { + *optp |= BINARY_SPLIT_TRIM; + l = CDR(list_val(l)); + continue; + } + if (t == am_trim_all) { + *optp |= BINARY_SPLIT_TRIM_ALL; + l = CDR(list_val(l)); + continue; + } + } + if (!is_tuple(t)) { + goto badarg; + } + tp = tuple_val(t); + if (arityval(*tp) != 2) { + goto badarg; + } + if (tp[1] != am_scope || is_not_tuple(tp[2])) { + goto badarg; + } + tp = tuple_val(tp[2]); + if (arityval(*tp) != 2) { + goto badarg; + } + if (!term_to_Uint(tp[1], &pos)) { + goto badarg; + } + if (!term_to_Sint(tp[2], &len)) { + goto badarg; + } + if (len < 0) { + Uint lentmp = -(Uint)len; + /* overflow */ + if ((Sint)lentmp < 0) { + goto badarg; + } + len = lentmp; + pos -= len; + } + /* overflow */ + if ((pos + len) < pos || (len > 0 && (pos + len) == pos)) { + goto badarg; + } + *endp = len + pos; + *posp = pos; + if ((orig_size = binary_size(bin)) < pos || + orig_size < (*endp)) { + goto badarg; + } + l = CDR(list_val(l)); + } + return 0; } else { - BUMP_ALL_REDS(BIF_P); - BIF_TRAP3(&binary_matches_trap_export, BIF_P, BIF_ARG_1, result, - BIF_ARG_3); + badarg: + return 1; } } -BIF_RETTYPE binary_match_3(BIF_ALIST_3) -{ - return binary_match(BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3); -} - -static BIF_RETTYPE -binary_match(Process *p, Eterm arg1, Eterm arg2, Eterm arg3) +static int do_binary_find(Process *p, Eterm subject, BinaryFindState *bfs, Binary *bin, + Eterm state_term, Eterm *res_term) { - Uint hsstart; - Uint hsend; - Eterm *tp; - Eterm type; - Binary *bin; - Eterm bin_term = NIL; - int runres; - Eterm result; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + char *state_ptr = NULL; - if (is_not_binary(arg1)) { - goto badarg; - } - if (parse_match_opts_list(arg3,arg1,&hsstart,&hsend)) { - goto badarg; - } - if (hsend == 0) { - BIF_RET(am_nomatch); - } - if (is_tuple(arg2)) { - tp = tuple_val(arg2); - if (arityval(*tp) != 2 || is_not_atom(tp[1])) { - goto badarg; - } - if (((tp[1] != am_bm) && (tp[1] != am_ac)) || - !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { - goto badarg; - } - type = tp[1]; - bin = ((ProcBin *) binary_val(tp[2]))->val; - if (type == am_bm && - ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data_bm) { - goto badarg; - } - if (type == am_ac && - ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data_ac) { - goto badarg; - } - bin_term = tp[2]; - } else if (do_binary_match_compile(arg2,&type,&bin)) { + ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize); + if (bitsize != 0) { goto badarg; } - runres = do_binary_match(p,arg1,hsstart,hsend,type,bin,NIL,&result); - if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { - Eterm *hp = HAlloc(p, PROC_BIN_SIZE); - bin_term = erts_mk_magic_binary_term(&hp, &MSO(p), bin); - } else if (bin_term == NIL) { - erts_bin_free(bin); + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc); } - switch (runres) { - case DO_BIN_MATCH_OK: - BIF_RET(result); - case DO_BIN_MATCH_RESTART: - BUMP_ALL_REDS(p); - BIF_TRAP3(&binary_match_trap_export, p, arg1, result, bin_term); - default: - goto badarg; + if (state_term != NIL) { + state_ptr = (char *)(big_val(state_term)); + state_ptr += sizeof(Eterm); + bfs = (BinaryFindState *)(state_ptr); + state_ptr += sizeof(BinaryFindState); } - badarg: - BIF_ERROR(p,BADARG); -} -BIF_RETTYPE binary_matches_3(BIF_ALIST_3) -{ - return binary_matches(BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3); + if (bfs->flags & BINARY_FIND_ALL) { + if (bfs->type == am_bm) { + BMData *bm; + Sint pos; + Eterm *hp; + BMFindAllState state; + Uint reds = get_reds(p, BM_LOOP_FACTOR); + Uint save_reds = reds; + + bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); +#ifdef HARDDEBUG + dump_bm_data(bm); +#endif + if (state_term == NIL) { + bm_init_find_all(&state, bfs->hsstart, bfs->hsend); + } else { + bm_restore_find_all(&state, state_ptr); + } + + pos = bm_find_all_non_overlapping(&state, bm, bytes, &reds); + if (pos == BM_NOT_FOUND) { + *res_term = bfs->not_found_result(p, subject, bfs); + } else if (pos == BM_RESTART) { + int x = + (SIZEOF_BINARY_FIND_ALL_STATE(state) / sizeof(Eterm)) + + !!(SIZEOF_BINARY_FIND_ALL_STATE(state) % sizeof(Eterm)); +#ifdef HARDDEBUG + erts_printf("Trap bm!\n"); +#endif + hp = HAlloc(p, x+1); + hp[0] = make_pos_bignum_header(x); + state_ptr = (char *)(hp); + memcpy((void *)(state_ptr+sizeof(Eterm)), bfs, sizeof(BinaryFindState)); + bm_serialize_find_all(&state, state_ptr+sizeof(Eterm)+sizeof(BinaryFindState)); + *res_term = make_big(hp); + erts_free_aligned_binary_bytes(temp_alloc); + bm_clean_find_all(&state); + return DO_BIN_MATCH_RESTART; + } else { + *res_term = bfs->global_result(p, subject, bfs, state.out, state.m); + } + erts_free_aligned_binary_bytes(temp_alloc); + bm_clean_find_all(&state); + BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); + return DO_BIN_MATCH_OK; + } else if (bfs->type == am_ac) { + ACTrie *act; + int acr; + ACFindAllState state; + Eterm *hp; + Uint reds = get_reds(p, AC_LOOP_FACTOR); + Uint save_reds = reds; + + act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); +#ifdef HARDDEBUG + dump_ac_trie(act); +#endif + if (state_term == NIL) { + ac_init_find_all(&state, act, bfs->hsstart, bfs->hsend); + } else { + ac_restore_find_all(&state, state_ptr); + } + acr = ac_find_all_non_overlapping(&state, bytes, &reds); + if (acr == AC_NOT_FOUND) { + *res_term = bfs->not_found_result(p, subject, bfs); + } else if (acr == AC_RESTART) { + int x = + (SIZEOF_BINARY_FIND_ALL_STATE(state) / sizeof(Eterm)) + + !!(SIZEOF_BINARY_FIND_ALL_STATE(state) % sizeof(Eterm)); +#ifdef HARDDEBUG + erts_printf("Trap ac!\n"); +#endif + hp = HAlloc(p, x+1); + hp[0] = make_pos_bignum_header(x); + state_ptr = (char *)(hp); + memcpy((void *)(state_ptr+sizeof(Eterm)), bfs, sizeof(BinaryFindState)); + ac_serialize_find_all(&state, state_ptr+sizeof(Eterm)+sizeof(BinaryFindState)); + *res_term = make_big(hp); + erts_free_aligned_binary_bytes(temp_alloc); + ac_clean_find_all(&state); + return DO_BIN_MATCH_RESTART; + } else { + *res_term = bfs->global_result(p, subject, bfs, state.out, state.m); + } + erts_free_aligned_binary_bytes(temp_alloc); + ac_clean_find_all(&state); + BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); + return DO_BIN_MATCH_OK; + } + } else { + if (bfs->type == am_bm) { + BMData *bm; + Sint pos; + Eterm *hp; + BMFindFirstState state; + Uint reds = get_reds(p, BM_LOOP_FACTOR); + Uint save_reds = reds; + + bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); +#ifdef HARDDEBUG + dump_bm_data(bm); +#endif + if (state_term == NIL) { + bm_init_find_first_match(&state, bfs->hsstart, bfs->hsend); + } else { + memcpy((void *)(&state), (const void *)(state_ptr), sizeof(BMFindFirstState)); + } + +#ifdef HARDDEBUG + erts_printf("(bm) state->pos = %ld, state->len = %lu\n",state.pos, + state.len); +#endif + pos = bm_find_first_match(&state, bm, bytes, &reds); + if (pos == BM_NOT_FOUND) { + *res_term = bfs->not_found_result(p, subject, bfs); + } else if (pos == BM_RESTART) { + int x = + (SIZEOF_BINARY_FIND_STATE(state) / sizeof(Eterm)) + + !!(SIZEOF_BINARY_FIND_STATE(state) % sizeof(Eterm)); +#ifdef HARDDEBUG + erts_printf("Trap bm!\n"); +#endif + hp = HAlloc(p, x+1); + hp[0] = make_pos_bignum_header(x); + state_ptr = (char *)(hp); + memcpy((void *)(state_ptr+sizeof(Eterm)), bfs, sizeof(BinaryFindState)); + memcpy((void *)(state_ptr+sizeof(Eterm)+sizeof(BinaryFindState)), + (const void *)(&state), sizeof(BMFindFirstState)); + *res_term = make_big(hp); + erts_free_aligned_binary_bytes(temp_alloc); + return DO_BIN_MATCH_RESTART; + } else { + *res_term = bfs->single_result(p, subject, bfs, pos, bm->len); + } + erts_free_aligned_binary_bytes(temp_alloc); + BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); + return DO_BIN_MATCH_OK; + } else if (bfs->type == am_ac) { + ACTrie *act; + Uint pos, rlen; + int acr; + ACFindFirstState state; + Eterm *hp; + Uint reds = get_reds(p, AC_LOOP_FACTOR); + Uint save_reds = reds; + + act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); +#ifdef HARDDEBUG + dump_ac_trie(act); +#endif + if (state_term == NIL) { + ac_init_find_first_match(&state, act, bfs->hsstart, bfs->hsend); + } else { + memcpy((void *)(&state), (const void *)(state_ptr), sizeof(ACFindFirstState)); + } + acr = ac_find_first_match(&state, bytes, &pos, &rlen, &reds); + if (acr == AC_NOT_FOUND) { + *res_term = bfs->not_found_result(p, subject, bfs); + } else if (acr == AC_RESTART) { + int x = + (SIZEOF_BINARY_FIND_STATE(state) / sizeof(Eterm)) + + !!(SIZEOF_BINARY_FIND_STATE(state) % sizeof(Eterm)); +#ifdef HARDDEBUG + erts_printf("Trap ac!\n"); +#endif + hp = HAlloc(p, x+1); + hp[0] = make_pos_bignum_header(x); + state_ptr = (char *)(hp); + memcpy((void *)(state_ptr+sizeof(Eterm)), bfs, sizeof(BinaryFindState)); + memcpy((void *)(state_ptr+sizeof(Eterm)+sizeof(BinaryFindState)), + (const void *)(&state), sizeof(ACFindFirstState)); + *res_term = make_big(hp); + erts_free_aligned_binary_bytes(temp_alloc); + return DO_BIN_MATCH_RESTART; + } else { + *res_term = bfs->single_result(p, subject, bfs, pos, rlen); + } + erts_free_aligned_binary_bytes(temp_alloc); + BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); + return DO_BIN_MATCH_OK; + } + } + badarg: + return DO_BIN_MATCH_BADARG; } static BIF_RETTYPE -binary_matches(Process *p, Eterm arg1, Eterm arg2, Eterm arg3) +binary_match(Process *p, Eterm arg1, Eterm arg2, Eterm arg3, Uint flags) { - Uint hsstart, hsend; + BinaryFindState bfs; Eterm *tp; - Eterm type; Binary *bin; Eterm bin_term = NIL; int runres; @@ -1481,11 +1412,12 @@ binary_matches(Process *p, Eterm arg1, Eterm arg2, Eterm arg3) if (is_not_binary(arg1)) { goto badarg; } - if (parse_match_opts_list(arg3,arg1,&hsstart,&hsend)) { + bfs.flags = flags; + if (parse_match_opts_list(arg3, arg1, &(bfs.hsstart), &(bfs.hsend))) { goto badarg; } - if (hsend == 0) { - BIF_RET(NIL); + if (bfs.hsend == 0) { + BIF_RET(do_match_not_found_result(p, arg1, &bfs)); } if (is_tuple(arg2)) { tp = tuple_val(arg2); @@ -1496,22 +1428,24 @@ binary_matches(Process *p, Eterm arg1, Eterm arg2, Eterm arg3) !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { goto badarg; } - type = tp[1]; + bfs.type = tp[1]; bin = ((ProcBin *) binary_val(tp[2]))->val; - if (type == am_bm && + if (bfs.type == am_bm && ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data_bm) { goto badarg; } - if (type == am_ac && + if (bfs.type == am_ac && ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data_ac) { goto badarg; } bin_term = tp[2]; - } else if (do_binary_match_compile(arg2,&type,&bin)) { + } else if (do_binary_match_compile(arg2, &(bfs.type), &bin)) { goto badarg; } - runres = do_binary_matches(p,arg1,hsstart,hsend,type,bin, - NIL,&result); + bfs.not_found_result = &do_match_not_found_result; + bfs.single_result = &do_match_single_result; + bfs.global_result = &do_match_global_result; + runres = do_binary_find(p, arg1, &bfs, bin, NIL, &result); if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { Eterm *hp = HAlloc(p, PROC_BIN_SIZE); bin_term = erts_mk_magic_binary_term(&hp, &MSO(p), bin); @@ -1523,8 +1457,7 @@ binary_matches(Process *p, Eterm arg1, Eterm arg2, Eterm arg3) BIF_RET(result); case DO_BIN_MATCH_RESTART: BUMP_ALL_REDS(p); - BIF_TRAP3(&binary_matches_trap_export, p, arg1, result, - bin_term); + BIF_TRAP3(&binary_find_trap_export, p, arg1, result, bin_term); default: goto badarg; } @@ -1532,98 +1465,392 @@ binary_matches(Process *p, Eterm arg1, Eterm arg2, Eterm arg3) BIF_ERROR(p,BADARG); } - BIF_RETTYPE binary_match_2(BIF_ALIST_2) { - return binary_match(BIF_P, BIF_ARG_1, BIF_ARG_2, THE_NON_VALUE); + return binary_match(BIF_P, BIF_ARG_1, BIF_ARG_2, THE_NON_VALUE, 0); } +BIF_RETTYPE binary_match_3(BIF_ALIST_3) +{ + return binary_match(BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3, 0); +} BIF_RETTYPE binary_matches_2(BIF_ALIST_2) { - return binary_matches(BIF_P, BIF_ARG_1, BIF_ARG_2, THE_NON_VALUE); + return binary_match(BIF_P, BIF_ARG_1, BIF_ARG_2, THE_NON_VALUE, BINARY_FIND_ALL); } +BIF_RETTYPE binary_matches_3(BIF_ALIST_3) +{ + return binary_match(BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3, BINARY_FIND_ALL); +} -BIF_RETTYPE erts_binary_part(Process *p, Eterm binary, Eterm epos, Eterm elen) +static BIF_RETTYPE +binary_split(Process *p, Eterm arg1, Eterm arg2, Eterm arg3) { - Uint pos; - Sint len; - size_t orig_size; - Eterm orig; - Uint offset; - Uint bit_offset; - Uint bit_size; - Eterm* hp; - ErlSubBin* sb; + BinaryFindState bfs; + Eterm *tp; + Binary *bin; + Eterm bin_term = NIL; + int runres; + Eterm result; - if (is_not_binary(binary)) { + if (is_not_binary(arg1)) { goto badarg; } - if (!term_to_Uint(epos, &pos)) { + if (parse_split_opts_list(arg3, arg1, &(bfs.hsstart), &(bfs.hsend), &(bfs.flags))) { goto badarg; } - if (!term_to_Sint(elen, &len)) { - goto badarg; + if (bfs.hsend == 0) { + result = do_split_not_found_result(p, arg1, &bfs); + BIF_RET(result); } - if (len < 0) { - Uint lentmp = -(Uint)len; - /* overflow */ - if ((Sint)lentmp < 0) { + if (is_tuple(arg2)) { + tp = tuple_val(arg2); + if (arityval(*tp) != 2 || is_not_atom(tp[1])) { goto badarg; } - len = lentmp; - if (len > pos) { + if (((tp[1] != am_bm) && (tp[1] != am_ac)) || + !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { goto badarg; } - pos -= len; - } - /* overflow */ - if ((pos + len) < pos || (len > 0 && (pos + len) == pos)){ + bfs.type = tp[1]; + bin = ((ProcBin *) binary_val(tp[2]))->val; + if (bfs.type == am_bm && + ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data_bm) { + goto badarg; + } + if (bfs.type == am_ac && + ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data_ac) { + goto badarg; + } + bin_term = tp[2]; + } else if (do_binary_match_compile(arg2, &(bfs.type), &bin)) { goto badarg; } - if ((orig_size = binary_size(binary)) < pos || - orig_size < (pos + len)) { + bfs.not_found_result = &do_split_not_found_result; + bfs.single_result = &do_split_single_result; + bfs.global_result = &do_split_global_result; + runres = do_binary_find(p, arg1, &bfs, bin, NIL, &result); + if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { + Eterm *hp = HAlloc(p, PROC_BIN_SIZE); + bin_term = erts_mk_magic_binary_term(&hp, &MSO(p), bin); + } else if (bin_term == NIL) { + erts_bin_free(bin); + } + switch(runres) { + case DO_BIN_MATCH_OK: + BIF_RET(result); + case DO_BIN_MATCH_RESTART: + BIF_TRAP3(&binary_find_trap_export, p, arg1, result, bin_term); + default: goto badarg; } + badarg: + BIF_ERROR(p, BADARG); +} +BIF_RETTYPE binary_split_2(BIF_ALIST_2) +{ + return binary_split(BIF_P, BIF_ARG_1, BIF_ARG_2, THE_NON_VALUE); +} +BIF_RETTYPE binary_split_3(BIF_ALIST_3) +{ + return binary_split(BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3); +} - hp = HAlloc(p, ERL_SUB_BIN_SIZE); +static Eterm do_match_not_found_result(Process *p, Eterm subject, BinaryFindState *bfs) +{ + if (bfs->flags & BINARY_FIND_ALL) { + return NIL; + } else { + return am_nomatch; + } +} - ERTS_GET_REAL_BIN(binary, orig, offset, bit_offset, bit_size); - sb = (ErlSubBin *) hp; - sb->thing_word = HEADER_SUB_BIN; - sb->size = len; - sb->offs = offset + pos; - sb->orig = orig; - sb->bitoffs = bit_offset; - sb->bitsize = 0; - sb->is_writable = 0; +static Eterm do_match_single_result(Process *p, Eterm subject, BinaryFindState *bfs, + Sint pos, Sint len) +{ + Eterm erlen; + Eterm *hp; + Eterm ret; - BIF_RET(make_binary(sb)); + erlen = erts_make_integer((Uint)(len), p); + ret = erts_make_integer(pos, p); + hp = HAlloc(p, 3); + ret = TUPLE2(hp, ret, erlen); - badarg: - BIF_ERROR(p, BADARG); + return ret; } -#define ERTS_NEED_GC(p, need) ((HEAP_LIMIT((p)) - HEAP_TOP((p))) <= (need)) - -BIF_RETTYPE erts_gc_binary_part(Process *p, Eterm *reg, Eterm live, int range_is_tuple) +static Eterm do_match_global_result(Process *p, Eterm subject, BinaryFindState *bfs, + FindallData *fad, Uint fad_sz) { - Uint pos; - Sint len; - size_t orig_size; - Eterm orig; - Uint offset; - Uint bit_offset; - Uint bit_size; - Eterm* hp; - ErlSubBin* sb; - Eterm binary; - Eterm *tp; - Eterm epos, elen; - int extra_args; + Sint i; + Eterm tpl; + Eterm *hp; + Eterm ret; + + for (i = 0; i < fad_sz; ++i) { + fad[i].epos = erts_make_integer(fad[i].pos, p); + fad[i].elen = erts_make_integer(fad[i].len, p); + } + hp = HAlloc(p, fad_sz * (3 + 2)); + ret = NIL; + for (i = fad_sz - 1; i >= 0; --i) { + tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); + hp += 3; + ret = CONS(hp, tpl, ret); + hp += 2; + } + + return ret; +} + +static Eterm do_split_not_found_result(Process *p, Eterm subject, BinaryFindState *bfs) +{ + Eterm *hp; + Eterm ret; + + hp = HAlloc(p, 2); + ret = CONS(hp, subject, NIL); + + return ret; +} + +static Eterm do_split_single_result(Process *p, Eterm subject, BinaryFindState *bfs, + Sint pos, Sint len) +{ + size_t orig_size; + Eterm orig; + Uint offset; + Uint bit_offset; + Uint bit_size; + ErlSubBin *sb1; + ErlSubBin *sb2; + Eterm *hp; + Eterm ret; + + orig_size = binary_size(subject); + + if ((bfs->flags & (BINARY_SPLIT_TRIM | BINARY_SPLIT_TRIM_ALL)) && + (orig_size - pos - len) == 0) { + if (pos == 0) { + ret = NIL; + } else { + hp = HAlloc(p, (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + sb1 = (ErlSubBin *) hp; + sb1->thing_word = HEADER_SUB_BIN; + sb1->size = pos; + sb1->offs = offset; + sb1->orig = orig; + sb1->bitoffs = bit_offset; + sb1->bitsize = bit_size; + sb1->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + + ret = CONS(hp, make_binary(sb1), NIL); + hp += 2; + } + } else { + if ((bfs->flags & BINARY_SPLIT_TRIM_ALL) && (pos == 0)) { + hp = HAlloc(p, 1 * (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + sb1 = NULL; + } else { + hp = HAlloc(p, 2 * (ERL_SUB_BIN_SIZE + 2)); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + sb1 = (ErlSubBin *) hp; + sb1->thing_word = HEADER_SUB_BIN; + sb1->size = pos; + sb1->offs = offset; + sb1->orig = orig; + sb1->bitoffs = bit_offset; + sb1->bitsize = 0; + sb1->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + } + + sb2 = (ErlSubBin *) hp; + sb2->thing_word = HEADER_SUB_BIN; + sb2->size = orig_size - pos - len; + sb2->offs = offset + pos + len; + sb2->orig = orig; + sb2->bitoffs = bit_offset; + sb2->bitsize = bit_size; + sb2->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + + ret = CONS(hp, make_binary(sb2), NIL); + hp += 2; + if (sb1 != NULL) { + ret = CONS(hp, make_binary(sb1), ret); + hp += 2; + } + } + return ret; +} + +static Eterm do_split_global_result(Process *p, Eterm subject, BinaryFindState *bfs, + FindallData *fad, Uint fad_sz) +{ + size_t orig_size; + Eterm orig; + Uint offset; + Uint bit_offset; + Uint bit_size; + ErlSubBin *sb; + Sint i; + Sint tail; + Uint list_size; + Uint end_pos; + Uint do_trim = bfs->flags & (BINARY_SPLIT_TRIM | BINARY_SPLIT_TRIM_ALL); + Eterm *hp; + Eterm *hendp; + Eterm ret; + + tail = fad_sz - 1; + list_size = fad_sz + 1; + orig_size = binary_size(subject); + end_pos = (Uint)(orig_size); + + hp = HAlloc(p, list_size * (ERL_SUB_BIN_SIZE + 2)); + hendp = hp + list_size * (ERL_SUB_BIN_SIZE + 2); + ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); + ASSERT(bit_size == 0); + + ret = NIL; + + for (i = tail; i >= 0; --i) { + sb = (ErlSubBin *)(hp); + sb->size = end_pos - (fad[i].pos + fad[i].len); + if (!(sb->size == 0 && do_trim)) { + sb->thing_word = HEADER_SUB_BIN; + sb->offs = offset + fad[i].pos + fad[i].len; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = 0; + sb->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + ret = CONS(hp, make_binary(sb), ret); + hp += 2; + do_trim &= ~BINARY_SPLIT_TRIM; + } + end_pos = fad[i].pos; + } + + sb = (ErlSubBin *)(hp); + sb->size = fad[0].pos; + if (!(sb->size == 0 && do_trim)) { + sb->thing_word = HEADER_SUB_BIN; + sb->offs = offset; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = 0; + sb->is_writable = 0; + hp += ERL_SUB_BIN_SIZE; + ret = CONS(hp, make_binary(sb), ret); + hp += 2; + } + HRelease(p, hendp, hp); + return ret; +} + +static BIF_RETTYPE binary_find_trap(BIF_ALIST_3) +{ + int runres; + Eterm result; + Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val; + runres = do_binary_find(BIF_P, BIF_ARG_1, THE_NON_VALUE, bin, BIF_ARG_2, &result); + if (runres == DO_BIN_MATCH_OK) { + BIF_RET(result); + } else { + BUMP_ALL_REDS(BIF_P); + BIF_TRAP3(&binary_find_trap_export, BIF_P, BIF_ARG_1, result, BIF_ARG_3); + } +} + +BIF_RETTYPE erts_binary_part(Process *p, Eterm binary, Eterm epos, Eterm elen) +{ + Uint pos; + Sint len; + size_t orig_size; + Eterm orig; + Uint offset; + Uint bit_offset; + Uint bit_size; + Eterm* hp; + ErlSubBin* sb; + + if (is_not_binary(binary)) { + goto badarg; + } + if (!term_to_Uint(epos, &pos)) { + goto badarg; + } + if (!term_to_Sint(elen, &len)) { + goto badarg; + } + if (len < 0) { + Uint lentmp = -(Uint)len; + /* overflow */ + if ((Sint)lentmp < 0) { + goto badarg; + } + len = lentmp; + if (len > pos) { + goto badarg; + } + pos -= len; + } + /* overflow */ + if ((pos + len) < pos || (len > 0 && (pos + len) == pos)){ + goto badarg; + } + if ((orig_size = binary_size(binary)) < pos || + orig_size < (pos + len)) { + goto badarg; + } + + + + hp = HAlloc(p, ERL_SUB_BIN_SIZE); + + ERTS_GET_REAL_BIN(binary, orig, offset, bit_offset, bit_size); + sb = (ErlSubBin *) hp; + sb->thing_word = HEADER_SUB_BIN; + sb->size = len; + sb->offs = offset + pos; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = 0; + sb->is_writable = 0; + + BIF_RET(make_binary(sb)); + + badarg: + BIF_ERROR(p, BADARG); +} + +#define ERTS_NEED_GC(p, need) ((HEAP_LIMIT((p)) - HEAP_TOP((p))) <= (need)) + +BIF_RETTYPE erts_gc_binary_part(Process *p, Eterm *reg, Eterm live, int range_is_tuple) +{ + Uint pos; + Sint len; + size_t orig_size; + Eterm orig; + Uint offset; + Uint bit_offset; + Uint bit_size; + Eterm* hp; + ErlSubBin* sb; + Eterm binary; + Eterm *tp; + Eterm epos, elen; + int extra_args; if (range_is_tuple) { @@ -2542,538 +2769,6 @@ BIF_RETTYPE binary_copy_2(BIF_ALIST_2) return do_binary_copy(BIF_P,BIF_ARG_1,BIF_ARG_2); } -static Eterm do_split_single_result(Process*, Eterm subject, - Sint pos, Sint len, Uint hsflags); -static Eterm do_split_global_result(Process*, FindallData *fad, Uint fad_sz, - Eterm subject, Uint hsflags); - -#define BINARY_SPLIT_GLOBAL 0x01 -#define BINARY_SPLIT_TRIM 0x02 -#define BINARY_SPLIT_TRIM_ALL 0x04 - -static int do_binary_split(Process *p, Eterm subject, Uint hsstart, - Uint hsend, Uint hsflags, Eterm type, Binary *bin, - Eterm state_term, Eterm *res_term) -{ - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - - ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc); - } - if (state_term != NIL) { - Eterm *ptr = big_val(state_term); - type = ptr[1]; - hsflags = (Uint)(ptr[2]); - } - - if (hsflags & BINARY_SPLIT_GLOBAL) { - if (type == am_bm) { - BMData *bm; - Sint pos; - Eterm *hp; - BMFindAllState state; - Uint reds = get_reds(p, BM_LOOP_FACTOR); - Uint save_reds = reds; - - bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_bm_data(bm); -#endif - if (state_term == NIL) { - bm_init_find_all(&state, hsstart, hsend); - } else { - Eterm *ptr = big_val(state_term); - bm_restore_find_all(&state, (char *)(ptr+3)); - } - - pos = bm_find_all_non_overlapping(&state, bm, bytes, &reds); - if (pos == BM_NOT_FOUND) { - hp = HAlloc(p, 2); - *res_term = CONS(hp, subject, NIL); - } else if (pos == BM_RESTART) { - int x = - (SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + - !!(SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm)); -#ifdef HARDDEBUG - erts_printf("Trap bm!\n"); -#endif - hp = HAlloc(p, x+3); - hp[0] = make_pos_bignum_header(x+2); - hp[1] = type; - hp[2] = (Eterm)(hsflags); - bm_serialize_find_all(&state, (char *)(hp+3)); - *res_term = make_big(hp); - erts_free_aligned_binary_bytes(temp_alloc); - bm_clean_find_all(&state); - return DO_BIN_MATCH_RESTART; - } else { - *res_term = do_split_global_result(p, state.out, state.m, subject, hsflags); - } - erts_free_aligned_binary_bytes(temp_alloc); - bm_clean_find_all(&state); - BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); - return DO_BIN_MATCH_OK; - } else if (type == am_ac) { - ACTrie *act; - int acr; - ACFindAllState state; - Eterm *hp; - Uint reds = get_reds(p, AC_LOOP_FACTOR); - Uint save_reds = reds; - - act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_ac_trie(act); -#endif - if (state_term == NIL) { - ac_init_find_all(&state, act, hsstart, hsend); - } else { - Eterm *ptr = big_val(state_term); - ac_restore_find_all(&state, (char *)(ptr+3)); - } - acr = ac_find_all_non_overlapping(&state, bytes, &reds); - if (acr == AC_NOT_FOUND) { - hp = HAlloc(p, 2); - *res_term = CONS(hp, subject, NIL); - } else if (acr == AC_RESTART) { - int x = (SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + - !!(SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm)); -#ifdef HARDDEBUG - erts_printf("Trap ac!\n"); -#endif - hp = HAlloc(p, x+3); - hp[0] = make_pos_bignum_header(x+2); - hp[1] = type; - hp[2] = (Eterm)(hsflags); - ac_serialize_find_all(&state, (char *)(hp+3)); - *res_term = make_big(hp); - erts_free_aligned_binary_bytes(temp_alloc); - ac_clean_find_all(&state); - return DO_BIN_MATCH_RESTART; - } else { - *res_term = do_split_global_result(p, state.out, state.m, subject, hsflags); - } - erts_free_aligned_binary_bytes(temp_alloc); - ac_clean_find_all(&state); - BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); - return DO_BIN_MATCH_OK; - } - } else { - if (type == am_bm) { - BMData *bm; - Sint pos; - Eterm *hp; - BMFindFirstState state; - Uint reds = get_reds(p, BM_LOOP_FACTOR); - Uint save_reds = reds; - - bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_bm_data(bm); -#endif - if (state_term == NIL) { - bm_init_find_first_match(&state, hsstart, hsend); - } else { - Eterm *ptr = big_val(state_term); - memcpy((void *)(&state), (const void *)(ptr+3), sizeof(BMFindFirstState)); - } - -#ifdef HARDDEBUG - erts_printf("(bm) state->pos = %ld, state->len = %lu\n",state.pos, - state.len); -#endif - pos = bm_find_first_match(&state, bm, bytes, &reds); - if (pos == BM_NOT_FOUND) { - hp = HAlloc(p, 2); - *res_term = CONS(hp, subject, NIL); - } else if (pos == BM_RESTART) { - int x = - (sizeof(state) / sizeof(Eterm)) + - !!(sizeof(state) % sizeof(Eterm)); -#ifdef HARDDEBUG - erts_printf("Trap bm!\n"); -#endif - hp = HAlloc(p, x+3); - hp[0] = make_pos_bignum_header(x+2); - hp[1] = type; - hp[2] = (Eterm)(hsflags); - memcpy((void *)(hp+3), (const void *)(&state), sizeof(state)); - *res_term = make_big(hp); - erts_free_aligned_binary_bytes(temp_alloc); - return DO_BIN_MATCH_RESTART; - } else { - *res_term = do_split_single_result(p, subject, pos, bm->len, hsflags); - } - erts_free_aligned_binary_bytes(temp_alloc); - BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); - return DO_BIN_MATCH_OK; - } else if (type == am_ac) { - ACTrie *act; - Uint pos, rlen; - int acr; - ACFindFirstState state; - Eterm *hp; - Uint reds = get_reds(p, AC_LOOP_FACTOR); - Uint save_reds = reds; - - act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_ac_trie(act); -#endif - if (state_term == NIL) { - ac_init_find_first_match(&state, act, hsstart, hsend); - } else { - Eterm *ptr = big_val(state_term); - memcpy((void *)(&state), (const void *)(ptr+3), sizeof(ACFindFirstState)); - } - acr = ac_find_first_match(&state, bytes, &pos, &rlen, &reds); - if (acr == AC_NOT_FOUND) { - hp = HAlloc(p, 2); - *res_term = CONS(hp, subject, NIL); - } else if (acr == AC_RESTART) { - int x = - (sizeof(state) / sizeof(Eterm)) + - !!(sizeof(state) % sizeof(Eterm)); -#ifdef HARDDEBUG - erts_printf("Trap ac!\n"); -#endif - hp = HAlloc(p, x+3); - hp[0] = make_pos_bignum_header(x+2); - hp[1] = type; - hp[2] = (Eterm)(hsflags); - memcpy((void *)(hp+3), (const void *)(&state), sizeof(state)); - *res_term = make_big(hp); - erts_free_aligned_binary_bytes(temp_alloc); - return DO_BIN_MATCH_RESTART; - } else { - *res_term = do_split_single_result(p, subject, pos, rlen, hsflags); - } - erts_free_aligned_binary_bytes(temp_alloc); - BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); - return DO_BIN_MATCH_OK; - } - } - badarg: - return DO_BIN_MATCH_BADARG; -} - -static Eterm do_split_single_result(Process* p, Eterm subject, - Sint pos, Sint len, Uint hsflags) -{ - size_t orig_size; - Eterm orig; - Uint offset; - Uint bit_offset; - Uint bit_size; - ErlSubBin *sb1; - ErlSubBin *sb2; - Eterm* hp; - Eterm ret; - - orig_size = binary_size(subject); - - if ((hsflags & (BINARY_SPLIT_TRIM | BINARY_SPLIT_TRIM_ALL)) && (orig_size - pos - len) == 0) { - if (pos != 0) { - ret = NIL; - } else { - hp = HAlloc(p, (ERL_SUB_BIN_SIZE + 2)); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); - sb1 = (ErlSubBin *) hp; - sb1->thing_word = HEADER_SUB_BIN; - sb1->size = pos; - sb1->offs = offset; - sb1->orig = orig; - sb1->bitoffs = bit_offset; - sb1->bitsize = bit_size; - sb1->is_writable = 0; - hp += ERL_SUB_BIN_SIZE; - - ret = CONS(hp, make_binary(sb1), NIL); - hp += 2; - } - } else { - if ((hsflags & BINARY_SPLIT_TRIM_ALL) && (pos == 0)) { - hp = HAlloc(p, 1 * (ERL_SUB_BIN_SIZE + 2)); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); - sb1 = NULL; - } else { - hp = HAlloc(p, 2 * (ERL_SUB_BIN_SIZE + 2)); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); - sb1 = (ErlSubBin *) hp; - sb1->thing_word = HEADER_SUB_BIN; - sb1->size = pos; - sb1->offs = offset; - sb1->orig = orig; - sb1->bitoffs = bit_offset; - sb1->bitsize = 0; - sb1->is_writable = 0; - hp += ERL_SUB_BIN_SIZE; - } - - sb2 = (ErlSubBin *) hp; - sb2->thing_word = HEADER_SUB_BIN; - sb2->size = orig_size - pos - len; - sb2->offs = offset + pos + len; - sb2->orig = orig; - sb2->bitoffs = bit_offset; - sb2->bitsize = bit_size; - sb2->is_writable = 0; - hp += ERL_SUB_BIN_SIZE; - - ret = CONS(hp, make_binary(sb2), NIL); - hp += 2; - if (sb1 != NULL) { - ret = CONS(hp, make_binary(sb1), ret); - hp += 2; - } - } - return ret; -} - -static Eterm do_split_global_result(Process* p, FindallData *fad, Uint fad_sz, - Uint subject, Uint hsflags) -{ - size_t orig_size; - Eterm orig; - Uint offset; - Uint bit_offset; - Uint bit_size; - ErlSubBin *sb; - Sint i; - Sint tail; - Uint list_size; - Uint end_pos; - Uint do_trim = hsflags & (BINARY_SPLIT_TRIM | BINARY_SPLIT_TRIM_ALL); - Eterm* hp; - Eterm* hendp; - Eterm ret; - - tail = fad_sz - 1; - list_size = fad_sz + 1; - orig_size = binary_size(subject); - end_pos = (Uint)(orig_size); - - hp = HAlloc(p, list_size * (ERL_SUB_BIN_SIZE + 2)); - hendp = hp + list_size * (ERL_SUB_BIN_SIZE + 2); - ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); - ASSERT(bit_size == 0); - - ret = NIL; - - for (i = tail; i >= 0; --i) { - sb = (ErlSubBin *)(hp); - sb->size = end_pos - (fad[i].pos + fad[i].len); - if (!(sb->size == 0 && do_trim)) { - sb->thing_word = HEADER_SUB_BIN; - sb->offs = offset + fad[i].pos + fad[i].len; - sb->orig = orig; - sb->bitoffs = bit_offset; - sb->bitsize = 0; - sb->is_writable = 0; - hp += ERL_SUB_BIN_SIZE; - ret = CONS(hp, make_binary(sb), ret); - hp += 2; - do_trim &= ~BINARY_SPLIT_TRIM; - } - end_pos = fad[i].pos; - } - - sb = (ErlSubBin *)(hp); - sb->size = fad[0].pos; - if (!(sb->size == 0 && do_trim)) { - sb->thing_word = HEADER_SUB_BIN; - sb->offs = offset; - sb->orig = orig; - sb->bitoffs = bit_offset; - sb->bitsize = 0; - sb->is_writable = 0; - hp += ERL_SUB_BIN_SIZE; - ret = CONS(hp, make_binary(sb), ret); - hp += 2; - } - HRelease(p, hendp, hp); - return ret; -} - -static int parse_split_opts_list(Eterm l, Eterm bin, Uint *posp, Uint *endp, Uint *optp) -{ - Eterm *tp; - Uint pos; - Sint len; - *optp = 0; - *posp = 0; - *endp = binary_size(bin); - if (l == THE_NON_VALUE || l == NIL) { - return 0; - } else if (is_list(l)) { - while(is_list(l)) { - Eterm t = CAR(list_val(l)); - Uint orig_size; - if (is_atom(t)) { - if (t == am_global) { - *optp |= BINARY_SPLIT_GLOBAL; - l = CDR(list_val(l)); - continue; - } - if (t == am_trim) { - *optp |= BINARY_SPLIT_TRIM; - l = CDR(list_val(l)); - continue; - } - if (t == am_trim_all) { - *optp |= BINARY_SPLIT_TRIM_ALL; - l = CDR(list_val(l)); - continue; - } - } - if (!is_tuple(t)) { - goto badarg; - } - tp = tuple_val(t); - if (arityval(*tp) != 2) { - goto badarg; - } - if (tp[1] != am_scope || is_not_tuple(tp[2])) { - goto badarg; - } - tp = tuple_val(tp[2]); - if (arityval(*tp) != 2) { - goto badarg; - } - if (!term_to_Uint(tp[1], &pos)) { - goto badarg; - } - if (!term_to_Sint(tp[2], &len)) { - goto badarg; - } - if (len < 0) { - Uint lentmp = -(Uint)len; - /* overflow */ - if ((Sint)lentmp < 0) { - goto badarg; - } - len = lentmp; - pos -= len; - } - /* overflow */ - if ((pos + len) < pos || (len > 0 && (pos + len) == pos)) { - goto badarg; - } - *endp = len + pos; - *posp = pos; - if ((orig_size = binary_size(bin)) < pos || - orig_size < (*endp)) { - goto badarg; - } - l = CDR(list_val(l)); - } - return 0; - } else { - badarg: - return 1; - } -} - -static BIF_RETTYPE binary_split_trap(BIF_ALIST_3) -{ - int runres; - Eterm result; - Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val; - runres = do_binary_split(BIF_P,BIF_ARG_1,0,0,0,NIL,bin,BIF_ARG_2,&result); - if (runres == DO_BIN_MATCH_OK) { - BIF_RET(result); - } else { - BUMP_ALL_REDS(BIF_P); - BIF_TRAP3(&binary_split_trap_export, BIF_P, BIF_ARG_1, result, - BIF_ARG_3); - } -} - -BIF_RETTYPE binary_split_3(BIF_ALIST_3) -{ - return binary_split(BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3); -} - -static BIF_RETTYPE -binary_split(Process *p, Eterm arg1, Eterm arg2, Eterm arg3) -{ - Uint hsflags; - Uint hsstart; - Uint hsend; - Eterm *tp; - Eterm type; - Binary *bin; - Eterm bin_term = NIL; - int runres; - Eterm result; - - if (is_not_binary(arg1)) { - goto badarg; - } - if (parse_split_opts_list(arg3, arg1, &hsstart, &hsend, &hsflags)) { - goto badarg; - } - if (hsend == 0) { - tp = HAlloc(p, 2); - result = NIL; - result = CONS(tp, arg1, result); - BIF_RET(result); - } - if (is_tuple(arg2)) { - tp = tuple_val(arg2); - if (arityval(*tp) != 2 || is_not_atom(tp[1])) { - goto badarg; - } - if (((tp[1] != am_bm) && (tp[1] != am_ac)) || - !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { - goto badarg; - } - type = tp[1]; - bin = ((ProcBin *) binary_val(tp[2]))->val; - if (type == am_bm && - ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data_bm) { - goto badarg; - } - if (type == am_ac && - ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data_ac) { - goto badarg; - } - bin_term = tp[2]; - } else if (do_binary_match_compile(arg2, &type, &bin)) { - goto badarg; - } - runres = do_binary_split(p, arg1, hsstart, hsend, hsflags, type, bin, NIL, &result); - if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { - Eterm *hp = HAlloc(p, PROC_BIN_SIZE); - bin_term = erts_mk_magic_binary_term(&hp, &MSO(p), bin); - } else if (bin_term == NIL) { - erts_bin_free(bin); - } - switch(runres) { - case DO_BIN_MATCH_OK: - BIF_RET(result); - case DO_BIN_MATCH_RESTART: - BIF_TRAP3(&binary_split_trap_export, p, arg1, result, bin_term); - default: - goto badarg; - } - badarg: - BIF_ERROR(p,BADARG); -} - - -BIF_RETTYPE binary_split_2(BIF_ALIST_2) -{ - return binary_split(BIF_P, BIF_ARG_1, BIF_ARG_2, THE_NON_VALUE); -} - - BIF_RETTYPE binary_referenced_byte_size_1(BIF_ALIST_1) { ErlSubBin *sb; -- cgit v1.2.3 From 8067992954f9b882ffd7332ee669a254c000f1b2 Mon Sep 17 00:00:00 2001 From: Andrew Bennett Date: Fri, 18 Sep 2015 09:38:46 -0600 Subject: erts: Minor refactor for binary find BIF backend * Use NULL instead of THE_NON_VALUE for non-Eterm variable. * Add BinaryFindState_bignum struct to avoid unnecessary type casting. --- erts/emulator/beam/erl_bif_binary.c | 109 ++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 54 deletions(-) diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c index 9f72b8c0ac..b1ebf0327e 100644 --- a/erts/emulator/beam/erl_bif_binary.c +++ b/erts/emulator/beam/erl_bif_binary.c @@ -531,21 +531,23 @@ static void ac_init_find_all(ACFindAllState *state, ACTrie *act, Sint startpos, state->out = NULL; } -static void ac_restore_find_all(ACFindAllState *state, char *buff) +static void ac_restore_find_all(ACFindAllState *state, + const ACFindAllState *src) { - memcpy(state,buff,sizeof(ACFindAllState)); + memcpy(state, src, sizeof(ACFindAllState)); if (state->allocated > 0) { state->out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * (state->allocated)); - memcpy(state->out,buff+sizeof(ACFindAllState),sizeof(FindallData)*state->m); + memcpy(state->out, src+1, sizeof(FindallData)*state->m); } else { state->out = NULL; } } -static void ac_serialize_find_all(ACFindAllState *state, char *buff) +static void ac_serialize_find_all(const ACFindAllState *state, + ACFindAllState *dst) { - memcpy(buff,state,sizeof(ACFindAllState)); - memcpy(buff+sizeof(ACFindAllState),state->out,sizeof(FindallData)*state->m); + memcpy(dst, state, sizeof(ACFindAllState)); + memcpy(dst+1, state->out, sizeof(FindallData)*state->m); } static void ac_clean_find_all(ACFindAllState *state) @@ -805,24 +807,24 @@ static void bm_init_find_all(BMFindAllState *state, Sint startpos, Uint len) state->out = NULL; } -static void bm_restore_find_all(BMFindAllState *state, char *buff) +static void bm_restore_find_all(BMFindAllState *state, + const BMFindAllState *src) { - memcpy(state,buff,sizeof(BMFindAllState)); + memcpy(state, src, sizeof(BMFindAllState)); if (state->allocated > 0) { state->out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * (state->allocated)); - memcpy(state->out,buff+sizeof(BMFindAllState), - sizeof(FindallData)*state->m); + memcpy(state->out, src+1, sizeof(FindallData)*state->m); } else { state->out = NULL; } } -static void bm_serialize_find_all(BMFindAllState *state, char *buff) +static void bm_serialize_find_all(const BMFindAllState *state, + BMFindAllState *dst) { - memcpy(buff,state,sizeof(BMFindAllState)); - memcpy(buff+sizeof(BMFindAllState),state->out, - sizeof(FindallData)*state->m); + memcpy(dst, state, sizeof(BMFindAllState)); + memcpy(dst+1, state->out, sizeof(FindallData)*state->m); } static void bm_clean_find_all(BMFindAllState *state) @@ -1032,6 +1034,17 @@ typedef struct BinaryFindState { Eterm (*global_result) (Process *, Eterm, struct BinaryFindState *, FindallData *, Uint); } BinaryFindState; +typedef struct BinaryFindState_bignum { + Eterm bignum_hdr; + BinaryFindState bfs; + union { + BMFindFirstState bmffs; + BMFindAllState bmfas; + ACFindFirstState acffs; + ACFindAllState acfas; + } data; +} BinaryFindState_bignum; + #define SIZEOF_BINARY_FIND_STATE(S) \ (sizeof(BinaryFindState)+sizeof(S)) @@ -1197,7 +1210,7 @@ static int do_binary_find(Process *p, Eterm subject, BinaryFindState *bfs, Binar byte *bytes; Uint bitoffs, bitsize; byte *temp_alloc = NULL; - char *state_ptr = NULL; + BinaryFindState_bignum *state_ptr = NULL; ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize); if (bitsize != 0) { @@ -1207,17 +1220,14 @@ static int do_binary_find(Process *p, Eterm subject, BinaryFindState *bfs, Binar bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc); } if (state_term != NIL) { - state_ptr = (char *)(big_val(state_term)); - state_ptr += sizeof(Eterm); - bfs = (BinaryFindState *)(state_ptr); - state_ptr += sizeof(BinaryFindState); + state_ptr = (BinaryFindState_bignum *)(big_val(state_term)); + bfs = &(state_ptr->bfs); } if (bfs->flags & BINARY_FIND_ALL) { if (bfs->type == am_bm) { BMData *bm; Sint pos; - Eterm *hp; BMFindAllState state; Uint reds = get_reds(p, BM_LOOP_FACTOR); Uint save_reds = reds; @@ -1229,7 +1239,7 @@ static int do_binary_find(Process *p, Eterm subject, BinaryFindState *bfs, Binar if (state_term == NIL) { bm_init_find_all(&state, bfs->hsstart, bfs->hsend); } else { - bm_restore_find_all(&state, state_ptr); + bm_restore_find_all(&state, &(state_ptr->data.bmfas)); } pos = bm_find_all_non_overlapping(&state, bm, bytes, &reds); @@ -1242,12 +1252,11 @@ static int do_binary_find(Process *p, Eterm subject, BinaryFindState *bfs, Binar #ifdef HARDDEBUG erts_printf("Trap bm!\n"); #endif - hp = HAlloc(p, x+1); - hp[0] = make_pos_bignum_header(x); - state_ptr = (char *)(hp); - memcpy((void *)(state_ptr+sizeof(Eterm)), bfs, sizeof(BinaryFindState)); - bm_serialize_find_all(&state, state_ptr+sizeof(Eterm)+sizeof(BinaryFindState)); - *res_term = make_big(hp); + state_ptr = (BinaryFindState_bignum*) HAlloc(p, x+1); + state_ptr->bignum_hdr = make_pos_bignum_header(x); + memcpy(&state_ptr->bfs, bfs, sizeof(BinaryFindState)); + bm_serialize_find_all(&state, &state_ptr->data.bmfas); + *res_term = make_big(&state_ptr->bignum_hdr); erts_free_aligned_binary_bytes(temp_alloc); bm_clean_find_all(&state); return DO_BIN_MATCH_RESTART; @@ -1262,7 +1271,6 @@ static int do_binary_find(Process *p, Eterm subject, BinaryFindState *bfs, Binar ACTrie *act; int acr; ACFindAllState state; - Eterm *hp; Uint reds = get_reds(p, AC_LOOP_FACTOR); Uint save_reds = reds; @@ -1273,7 +1281,7 @@ static int do_binary_find(Process *p, Eterm subject, BinaryFindState *bfs, Binar if (state_term == NIL) { ac_init_find_all(&state, act, bfs->hsstart, bfs->hsend); } else { - ac_restore_find_all(&state, state_ptr); + ac_restore_find_all(&state, &(state_ptr->data.acfas)); } acr = ac_find_all_non_overlapping(&state, bytes, &reds); if (acr == AC_NOT_FOUND) { @@ -1285,12 +1293,11 @@ static int do_binary_find(Process *p, Eterm subject, BinaryFindState *bfs, Binar #ifdef HARDDEBUG erts_printf("Trap ac!\n"); #endif - hp = HAlloc(p, x+1); - hp[0] = make_pos_bignum_header(x); - state_ptr = (char *)(hp); - memcpy((void *)(state_ptr+sizeof(Eterm)), bfs, sizeof(BinaryFindState)); - ac_serialize_find_all(&state, state_ptr+sizeof(Eterm)+sizeof(BinaryFindState)); - *res_term = make_big(hp); + state_ptr = (BinaryFindState_bignum*) HAlloc(p, x+1); + state_ptr->bignum_hdr = make_pos_bignum_header(x); + memcpy(&state_ptr->bfs, bfs, sizeof(BinaryFindState)); + ac_serialize_find_all(&state, &state_ptr->data.acfas); + *res_term = make_big(&state_ptr->bignum_hdr); erts_free_aligned_binary_bytes(temp_alloc); ac_clean_find_all(&state); return DO_BIN_MATCH_RESTART; @@ -1306,7 +1313,6 @@ static int do_binary_find(Process *p, Eterm subject, BinaryFindState *bfs, Binar if (bfs->type == am_bm) { BMData *bm; Sint pos; - Eterm *hp; BMFindFirstState state; Uint reds = get_reds(p, BM_LOOP_FACTOR); Uint save_reds = reds; @@ -1318,7 +1324,7 @@ static int do_binary_find(Process *p, Eterm subject, BinaryFindState *bfs, Binar if (state_term == NIL) { bm_init_find_first_match(&state, bfs->hsstart, bfs->hsend); } else { - memcpy((void *)(&state), (const void *)(state_ptr), sizeof(BMFindFirstState)); + memcpy(&state, &state_ptr->data.bmffs, sizeof(BMFindFirstState)); } #ifdef HARDDEBUG @@ -1335,13 +1341,11 @@ static int do_binary_find(Process *p, Eterm subject, BinaryFindState *bfs, Binar #ifdef HARDDEBUG erts_printf("Trap bm!\n"); #endif - hp = HAlloc(p, x+1); - hp[0] = make_pos_bignum_header(x); - state_ptr = (char *)(hp); - memcpy((void *)(state_ptr+sizeof(Eterm)), bfs, sizeof(BinaryFindState)); - memcpy((void *)(state_ptr+sizeof(Eterm)+sizeof(BinaryFindState)), - (const void *)(&state), sizeof(BMFindFirstState)); - *res_term = make_big(hp); + state_ptr = (BinaryFindState_bignum*) HAlloc(p, x+1); + state_ptr->bignum_hdr = make_pos_bignum_header(x); + memcpy(&state_ptr->bfs, bfs, sizeof(BinaryFindState)); + memcpy(&state_ptr->data.acffs, &state, sizeof(BMFindFirstState)); + *res_term = make_big(&state_ptr->bignum_hdr); erts_free_aligned_binary_bytes(temp_alloc); return DO_BIN_MATCH_RESTART; } else { @@ -1355,7 +1359,6 @@ static int do_binary_find(Process *p, Eterm subject, BinaryFindState *bfs, Binar Uint pos, rlen; int acr; ACFindFirstState state; - Eterm *hp; Uint reds = get_reds(p, AC_LOOP_FACTOR); Uint save_reds = reds; @@ -1366,7 +1369,7 @@ static int do_binary_find(Process *p, Eterm subject, BinaryFindState *bfs, Binar if (state_term == NIL) { ac_init_find_first_match(&state, act, bfs->hsstart, bfs->hsend); } else { - memcpy((void *)(&state), (const void *)(state_ptr), sizeof(ACFindFirstState)); + memcpy(&state, &state_ptr->data.acffs, sizeof(ACFindFirstState)); } acr = ac_find_first_match(&state, bytes, &pos, &rlen, &reds); if (acr == AC_NOT_FOUND) { @@ -1378,13 +1381,11 @@ static int do_binary_find(Process *p, Eterm subject, BinaryFindState *bfs, Binar #ifdef HARDDEBUG erts_printf("Trap ac!\n"); #endif - hp = HAlloc(p, x+1); - hp[0] = make_pos_bignum_header(x); - state_ptr = (char *)(hp); - memcpy((void *)(state_ptr+sizeof(Eterm)), bfs, sizeof(BinaryFindState)); - memcpy((void *)(state_ptr+sizeof(Eterm)+sizeof(BinaryFindState)), - (const void *)(&state), sizeof(ACFindFirstState)); - *res_term = make_big(hp); + state_ptr = (BinaryFindState_bignum*) HAlloc(p, x+1); + state_ptr->bignum_hdr = make_pos_bignum_header(x); + memcpy(&state_ptr->bfs, bfs, sizeof(BinaryFindState)); + memcpy(&state_ptr->data.acffs, &state, sizeof(ACFindFirstState)); + *res_term = make_big(&state_ptr->bignum_hdr); erts_free_aligned_binary_bytes(temp_alloc); return DO_BIN_MATCH_RESTART; } else { @@ -1763,7 +1764,7 @@ static BIF_RETTYPE binary_find_trap(BIF_ALIST_3) int runres; Eterm result; Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val; - runres = do_binary_find(BIF_P, BIF_ARG_1, THE_NON_VALUE, bin, BIF_ARG_2, &result); + runres = do_binary_find(BIF_P, BIF_ARG_1, NULL, bin, BIF_ARG_2, &result); if (runres == DO_BIN_MATCH_OK) { BIF_RET(result); } else { -- cgit v1.2.3