diff options
author | Lukas Larsson <[email protected]> | 2017-09-13 10:40:52 +0200 |
---|---|---|
committer | Lukas Larsson <[email protected]> | 2017-09-13 10:40:52 +0200 |
commit | edafd99aa0b808b79733dbe6ce3175b593c7b307 (patch) | |
tree | 8c4122299adbbeb7f28f41abc86ec12a5239233b /erts/emulator/beam | |
parent | 15ab3815f8e4d810ad5fccbfa63c59bde45fd37e (diff) | |
parent | 964354ba3b44b0e3a8b8341e63bc43ce24de2c02 (diff) | |
download | otp-edafd99aa0b808b79733dbe6ce3175b593c7b307.tar.gz otp-edafd99aa0b808b79733dbe6ce3175b593c7b307.tar.bz2 otp-edafd99aa0b808b79733dbe6ce3175b593c7b307.zip |
Merge branch 'potatosalad/erts/binary_find_bif_improved/PR-1480/OTP-14610'
* potatosalad/erts/binary_find_bif_improved/PR-1480/OTP-14610:
stdlib: Improved BIF for binary matches and split.
Diffstat (limited to 'erts/emulator/beam')
-rw-r--r-- | erts/emulator/beam/erl_alloc.types | 1 | ||||
-rw-r--r-- | erts/emulator/beam/erl_bif_binary.c | 1108 |
2 files changed, 602 insertions, 507 deletions
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types index 8142ea8893..11884299e2 100644 --- a/erts/emulator/beam/erl_alloc.types +++ b/erts/emulator/beam/erl_alloc.types @@ -258,6 +258,7 @@ type MREF_ENT STANDARD SYSTEM magic_ref_entry type MREF_TAB_BKTS STANDARD SYSTEM magic_ref_table_buckets type MREF_TAB LONG_LIVED SYSTEM magic_ref_table type MINDIRECTION FIXED_SIZE SYSTEM magic_indirection +type BINARY_FIND SHORT_LIVED PROCESSES binary_find type THR_Q_EL STANDARD SYSTEM thr_q_element type THR_Q_EL_SL FIXED_SIZE SYSTEM sl_thr_q_element diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c index dcffde5777..4cafa499a9 100644 --- a/erts/emulator/beam/erl_bif_binary.c +++ b/erts/emulator/beam/erl_bif_binary.c @@ -171,6 +171,16 @@ static void *my_alloc(MyAllocator *my, Uint size) #define ALPHABET_SIZE 256 +typedef struct _findall_data { + Uint pos; + Uint len; +#ifdef HARDDEBUG + Uint id; +#endif + Eterm epos; + Eterm elen; +} FindallData; + typedef struct _ac_node { #ifdef HARDDEBUG Uint32 id; /* To identify h pointer targets when @@ -208,6 +218,103 @@ typedef struct _bm_data { Sint badshift[ALPHABET_SIZE]; } BMData; +typedef struct _ac_find_all_state { + ACNode *q; + Uint pos; + Uint len; + Uint m; + Uint allocated; + FindallData *out; +} ACFindAllState; + +typedef struct _ac_find_first_state { + ACNode *q; + Uint pos; + Uint len; + ACNode *candidate; + Uint candidate_start; +} ACFindFirstState; + +typedef struct _bm_find_all_state { + Sint pos; + Sint len; + Uint m; + Uint allocated; + FindallData *out; +} BMFindAllState; + +typedef struct _bm_find_first_state { + Sint pos; + Sint len; +} BMFindFirstState; + +typedef enum _bf_return { + BF_RESTART = -3, + BF_NOT_FOUND, + BF_BADARG, + BF_OK +} BFReturn; + +typedef struct _binary_find_all_context { + ErtsHeapFactory factory; + Eterm term; + Sint head; + Sint tail; + Uint end_pos; + Uint size; + FindallData *data; + union { + ACFindAllState ac; + BMFindAllState bm; + } d; +} BinaryFindAllContext; + +typedef struct _binary_find_first_context { + Uint pos; + Uint len; + union { + ACFindFirstState ac; + BMFindFirstState bm; + } d; +} BinaryFindFirstContext; + +typedef struct _binary_find_context BinaryFindContext; + +typedef struct _binary_find_search { + void (*init) (BinaryFindContext *); + BFReturn (*find) (BinaryFindContext *, byte *); + void (*done) (BinaryFindContext *); +} BinaryFindSearch; + +typedef Eterm (*BinaryFindResult)(Process *, Eterm, BinaryFindContext **); + +typedef enum _binary_find_state { + BFSearch, + BFResult, + BFDone +} BinaryFindState; + +struct _binary_find_context { + Eterm pat_type; + Eterm pat_term; + Binary *pat_bin; + Uint flags; + Uint hsstart; + Uint hsend; + int loop_factor; + int exported; + Uint reds; + BinaryFindState state; + Eterm trap_term; + BinaryFindSearch *search; + BinaryFindResult not_found; + BinaryFindResult found; + union { + BinaryFindAllContext fa; + BinaryFindFirstContext ff; + } u; +}; + #ifdef HARDDEBUG static void dump_bm_data(BMData *bm); static void dump_ac_trie(ACTrie *act); @@ -414,32 +521,25 @@ static void ac_compute_failure_functions(ACTrie *act, ACNode **qbuff) * Basic AC finds the first end before the first start... * */ -typedef struct { - ACNode *q; - Uint pos; - Uint len; - ACNode *candidate; - Uint candidate_start; -} ACFindFirstState; - - -static void ac_init_find_first_match(ACFindFirstState *state, ACTrie *act, Sint startpos, Uint len) +static void ac_init_find_first_match(BinaryFindContext *ctx) { + ACFindFirstState *state = &(ctx->u.ff.d.ac); + ACTrie *act = ERTS_MAGIC_BIN_DATA(ctx->pat_bin); state->q = act->root; - state->pos = startpos; - state->len = len; + state->pos = ctx->hsstart; + state->len = ctx->hsend; state->candidate = NULL; state->candidate_start = 0; } -#define AC_OK 0 -#define AC_NOT_FOUND -1 -#define AC_RESTART -2 #define AC_LOOP_FACTOR 10 -static int ac_find_first_match(ACFindFirstState *state, byte *haystack, - Uint *mpos, Uint *mlen, Uint *reductions) +static BFReturn ac_find_first_match(BinaryFindContext *ctx, byte *haystack) { + ACFindFirstState *state = &(ctx->u.ff.d.ac); + Uint *mpos = &(ctx->u.ff.pos); + Uint *mlen = &(ctx->u.ff.len); + Uint *reductions = &(ctx->reds); ACNode *q = state->q; Uint i = state->pos; ACNode *candidate = state->candidate, *r; @@ -455,7 +555,7 @@ static int ac_find_first_match(ACFindFirstState *state, byte *haystack, state->len = len; state->candidate = candidate; state->candidate_start = candidate_start; - return AC_RESTART; + return BF_RESTART; } while (q->g[haystack[i]] == NULL && q->h != q) { @@ -485,68 +585,33 @@ static int ac_find_first_match(ACFindFirstState *state, byte *haystack, } *reductions = reds; if (!candidate) { - return AC_NOT_FOUND; + return BF_NOT_FOUND; } #ifdef HARDDEBUG dump_ac_node(candidate,0,'?'); #endif *mpos = candidate_start; *mlen = candidate->d; - return AC_OK; + return BF_OK; } -typedef struct _findall_data { - Uint pos; - Uint len; -#ifdef HARDDEBUG - Uint id; -#endif - Eterm epos; - Eterm elen; -} FindallData; - -typedef struct { - ACNode *q; - Uint pos; - Uint len; - Uint m; - Uint allocated; - FindallData *out; -} ACFindAllState; - -static void ac_init_find_all(ACFindAllState *state, ACTrie *act, Sint startpos, Uint len) +static void ac_init_find_all(BinaryFindContext *ctx) { + ACFindAllState *state = &(ctx->u.fa.d.ac); + ACTrie *act = ERTS_MAGIC_BIN_DATA(ctx->pat_bin); state->q = act->root; - state->pos = startpos; - state->len = len; + state->pos = ctx->hsstart; + state->len = ctx->hsend; state->m = 0; state->allocated = 0; state->out = NULL; } -static void ac_restore_find_all(ACFindAllState *state, - const ACFindAllState *src) -{ - memcpy(state, src, sizeof(ACFindAllState)); - if (state->allocated > 0) { - state->out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * (state->allocated)); - memcpy(state->out, src+1, sizeof(FindallData)*state->m); - } else { - state->out = NULL; - } -} - -static void ac_serialize_find_all(const ACFindAllState *state, - ACFindAllState *dst) -{ - memcpy(dst, state, sizeof(ACFindAllState)); - memcpy(dst+1, state->out, sizeof(FindallData)*state->m); -} - -static void ac_clean_find_all(ACFindAllState *state) +static void ac_clean_find_all(BinaryFindContext *ctx) { + ACFindAllState *state = &(ctx->u.fa.d.ac); if (state->out != NULL) { - erts_free(ERTS_ALC_T_TMP, state->out); + erts_free(ERTS_ALC_T_BINARY_FIND, state->out); } #ifdef HARDDEBUG state->out = NULL; @@ -558,9 +623,10 @@ static void ac_clean_find_all(ACFindAllState *state) * Differs to the find_first function in that it stores all matches and the values * arte returned only in the state. */ -static int ac_find_all_non_overlapping(ACFindAllState *state, byte *haystack, - Uint *reductions) +static BFReturn ac_find_all_non_overlapping(BinaryFindContext *ctx, byte *haystack) { + ACFindAllState *state = &(ctx->u.fa.d.ac); + Uint *reductions = &(ctx->reds); ACNode *q = state->q; Uint i = state->pos; Uint rstart; @@ -571,7 +637,6 @@ static int ac_find_all_non_overlapping(ACFindAllState *state, byte *haystack, FindallData *out = state->out; register Uint reds = *reductions; - while (i < len) { if (--reds == 0) { state->q = q; @@ -580,7 +645,7 @@ static int ac_find_all_non_overlapping(ACFindAllState *state, byte *haystack, state->m = m; state->allocated = allocated; state->out = out; - return AC_RESTART; + return BF_RESTART; } while (q->g[haystack[i]] == NULL && q->h != q) { q = q->h; @@ -618,11 +683,11 @@ static int ac_find_all_non_overlapping(ACFindAllState *state, byte *haystack, if (m >= allocated) { if (!allocated) { allocated = 10; - out = erts_alloc(ERTS_ALC_T_TMP, + out = erts_alloc(ERTS_ALC_T_BINARY_FIND, sizeof(FindallData) * allocated); } else { allocated *= 2; - out = erts_realloc(ERTS_ALC_T_TMP, out, + out = erts_realloc(ERTS_ALC_T_BINARY_FIND, out, sizeof(FindallData) * allocated); } @@ -649,7 +714,7 @@ static int ac_find_all_non_overlapping(ACFindAllState *state, byte *haystack, *reductions = reds; state->m = m; state->out = out; - return (m == 0) ? AC_NOT_FOUND : AC_OK; + return (m == 0) ? BF_NOT_FOUND : BF_OK; } /* @@ -736,27 +801,22 @@ static void compute_goodshifts(BMData *bmd) erts_free(ERTS_ALC_T_TMP, suffixes); } -typedef struct { - Sint pos; - Sint len; -} BMFindFirstState; - -#define BM_OK 0 /* used only for find_all */ -#define BM_NOT_FOUND -1 -#define BM_RESTART -2 #define BM_LOOP_FACTOR 10 /* Should we have a higher value? */ -static void bm_init_find_first_match(BMFindFirstState *state, Sint startpos, - Uint len) +static void bm_init_find_first_match(BinaryFindContext *ctx) { - state->pos = startpos; - state->len = (Sint) len; + BMFindFirstState *state = &(ctx->u.ff.d.bm); + state->pos = ctx->hsstart; + state->len = ctx->hsend; } - -static Sint bm_find_first_match(BMFindFirstState *state, BMData *bmd, - byte *haystack, Uint *reductions) +static BFReturn bm_find_first_match(BinaryFindContext *ctx, byte *haystack) { + BMFindFirstState *state = &(ctx->u.ff.d.bm); + BMData *bmd = ERTS_MAGIC_BIN_DATA(ctx->pat_bin); + Uint *mpos = &(ctx->u.ff.pos); + Uint *mlen = &(ctx->u.ff.len); + Uint *reductions = &(ctx->reds); Sint blen = bmd->len; Sint len = state->len; Sint *gs = bmd->goodshift; @@ -769,61 +829,37 @@ static Sint bm_find_first_match(BMFindFirstState *state, BMData *bmd, while (j <= len - blen) { if (--reds == 0) { state->pos = j; - return BM_RESTART; + return BF_RESTART; } for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i) ; if (i < 0) { /* found */ *reductions = reds; - return j; + *mpos = (Uint) j; + *mlen = (Uint) blen; + return BF_OK; } j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i); } *reductions = reds; - return BM_NOT_FOUND; + return BF_NOT_FOUND; } -typedef struct { - Sint pos; - Sint len; - Uint m; - Uint allocated; - FindallData *out; -} BMFindAllState; - -static void bm_init_find_all(BMFindAllState *state, Sint startpos, Uint len) +static void bm_init_find_all(BinaryFindContext *ctx) { - state->pos = startpos; - state->len = (Sint) len; + BMFindAllState *state = &(ctx->u.fa.d.bm); + state->pos = ctx->hsstart; + state->len = ctx->hsend; state->m = 0; state->allocated = 0; state->out = NULL; } -static void bm_restore_find_all(BMFindAllState *state, - const BMFindAllState *src) -{ - memcpy(state, src, sizeof(BMFindAllState)); - if (state->allocated > 0) { - state->out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * - (state->allocated)); - memcpy(state->out, src+1, sizeof(FindallData)*state->m); - } else { - state->out = NULL; - } -} - -static void bm_serialize_find_all(const BMFindAllState *state, - BMFindAllState *dst) -{ - memcpy(dst, state, sizeof(BMFindAllState)); - memcpy(dst+1, state->out, sizeof(FindallData)*state->m); -} - -static void bm_clean_find_all(BMFindAllState *state) +static void bm_clean_find_all(BinaryFindContext *ctx) { + BMFindAllState *state = &(ctx->u.fa.d.bm); if (state->out != NULL) { - erts_free(ERTS_ALC_T_TMP, state->out); + erts_free(ERTS_ALC_T_BINARY_FIND, state->out); } #ifdef HARDDEBUG state->out = NULL; @@ -835,10 +871,11 @@ static void bm_clean_find_all(BMFindAllState *state) * Differs to the find_first function in that it stores all matches and the * values are returned only in the state. */ -static Sint bm_find_all_non_overlapping(BMFindAllState *state, - BMData *bmd, byte *haystack, - Uint *reductions) +static BFReturn bm_find_all_non_overlapping(BinaryFindContext *ctx, byte *haystack) { + BMFindAllState *state = &(ctx->u.fa.d.bm); + BMData *bmd = ERTS_MAGIC_BIN_DATA(ctx->pat_bin); + Uint *reductions = &(ctx->reds); Sint blen = bmd->len; Sint len = state->len; Sint *gs = bmd->goodshift; @@ -857,7 +894,7 @@ static Sint bm_find_all_non_overlapping(BMFindAllState *state, state->m = m; state->allocated = allocated; state->out = out; - return BM_RESTART; + return BF_RESTART; } for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i) ; @@ -865,10 +902,11 @@ static Sint bm_find_all_non_overlapping(BMFindAllState *state, if (m >= allocated) { if (!allocated) { allocated = 10; - out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * allocated); + out = erts_alloc(ERTS_ALC_T_BINARY_FIND, + sizeof(FindallData) * allocated); } else { allocated *= 2; - out = erts_realloc(ERTS_ALC_T_TMP, out, + out = erts_realloc(ERTS_ALC_T_BINARY_FIND, out, sizeof(FindallData) * allocated); } } @@ -883,7 +921,7 @@ static Sint bm_find_all_non_overlapping(BMFindAllState *state, state->m = m; state->out = out; *reductions = reds; - return (m == 0) ? BM_NOT_FOUND : BM_OK; + return (m == 0) ? BF_NOT_FOUND : BF_OK; } /* @@ -1009,51 +1047,160 @@ BIF_RETTYPE binary_compile_pattern_1(BIF_ALIST_1) BIF_RET(ret); } -#define DO_BIN_MATCH_OK 0 -#define DO_BIN_MATCH_BADARG -1 -#define DO_BIN_MATCH_RESTART -2 +#define BF_FLAG_GLOBAL 0x01 +#define BF_FLAG_SPLIT_TRIM 0x02 +#define BF_FLAG_SPLIT_TRIM_ALL 0x04 -#define BINARY_FIND_ALL 0x01 -#define BINARY_SPLIT_TRIM 0x02 -#define BINARY_SPLIT_TRIM_ALL 0x04 +static void bf_context_init(BinaryFindContext *ctx, BinaryFindResult not_found, + BinaryFindResult single, BinaryFindResult global, + Binary *pat_bin); +static BinaryFindContext *bf_context_export(Process *p, BinaryFindContext *src); +static int bf_context_destructor(Binary *ctx_bin); +#ifdef HARDDEBUG +static void bf_context_dump(BinaryFindContext *ctx); +#endif -typedef struct BinaryFindState { - Eterm type; - Uint flags; - Uint hsstart; - Uint hsend; - Eterm (*not_found_result) (Process *, Eterm, struct BinaryFindState *); - Eterm (*single_result) (Process *, Eterm, struct BinaryFindState *, Sint, Sint); - Eterm (*global_result) (Process *, Eterm, struct BinaryFindState *, FindallData *, Uint); -} BinaryFindState; +static BinaryFindSearch bf_search_ac_global = { + ac_init_find_all, + ac_find_all_non_overlapping, + ac_clean_find_all +}; + +static BinaryFindSearch bf_search_ac_single = { + ac_init_find_first_match, + ac_find_first_match, + NULL +}; + +static BinaryFindSearch bf_search_bm_global = { + bm_init_find_all, + bm_find_all_non_overlapping, + bm_clean_find_all +}; + +static BinaryFindSearch bf_search_bm_single = { + bm_init_find_first_match, + bm_find_first_match, + NULL +}; + +static void bf_context_init(BinaryFindContext *ctx, BinaryFindResult not_found, + BinaryFindResult single, BinaryFindResult global, + Binary *pat_bin) +{ + ctx->exported = 0; + ctx->state = BFSearch; + ctx->not_found = not_found; + if (ctx->flags & BF_FLAG_GLOBAL) { + ctx->found = global; + if (ctx->pat_type == am_bm) { + ctx->search = &bf_search_bm_global; + ctx->loop_factor = BM_LOOP_FACTOR; + } else if (ctx->pat_type == am_ac) { + ctx->search = &bf_search_ac_global; + ctx->loop_factor = AC_LOOP_FACTOR; + } + } else { + ctx->found = single; + if (ctx->pat_type == am_bm) { + ctx->search = &bf_search_bm_single; + ctx->loop_factor = BM_LOOP_FACTOR; + } else if (ctx->pat_type == am_ac) { + ctx->search = &bf_search_ac_single; + ctx->loop_factor = AC_LOOP_FACTOR; + } + } + ctx->trap_term = THE_NON_VALUE; + ctx->pat_bin = pat_bin; + ctx->search->init(ctx); +} -typedef struct BinaryFindState_bignum { - Eterm bignum_hdr; - BinaryFindState bfs; - union { - BMFindFirstState bmffs; - BMFindAllState bmfas; - ACFindFirstState acffs; - ACFindAllState acfas; - } data; -} BinaryFindState_bignum; - -#define SIZEOF_BINARY_FIND_STATE(S) \ - (sizeof(BinaryFindState)+sizeof(S)) - -#define SIZEOF_BINARY_FIND_ALL_STATE(S) \ - (sizeof(BinaryFindState)+sizeof(S)+(sizeof(FindallData)*(S).m)) - -static Eterm do_match_not_found_result(Process *p, Eterm subject, BinaryFindState *bfs); -static Eterm do_match_single_result(Process *p, Eterm subject, BinaryFindState *bfs, - Sint pos, Sint len); -static Eterm do_match_global_result(Process *p, Eterm subject, BinaryFindState *bfs, - FindallData *fad, Uint fad_sz); -static Eterm do_split_not_found_result(Process *p, Eterm subject, BinaryFindState *bfs); -static Eterm do_split_single_result(Process *p, Eterm subject, BinaryFindState *bfs, - Sint pos, Sint len); -static Eterm do_split_global_result(Process *p, Eterm subject, BinaryFindState *bfs, - FindallData *fad, Uint fad_sz); +static BinaryFindContext *bf_context_export(Process *p, BinaryFindContext *src) +{ + Binary *ctx_bin; + BinaryFindContext *ctx; + Eterm *hp; + + ASSERT(src->exported == 0); + ctx_bin = erts_create_magic_binary(sizeof(BinaryFindContext), + bf_context_destructor); + ctx = ERTS_MAGIC_BIN_DATA(ctx_bin); + sys_memcpy(ctx, src, sizeof(BinaryFindContext)); + if (ctx->pat_bin != NULL && ctx->pat_term == THE_NON_VALUE) { + hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE * 2); + ctx->pat_term = erts_mk_magic_ref(&hp, &MSO(p), ctx->pat_bin); + } else { + hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE); + } + ctx->trap_term = erts_mk_magic_ref(&hp, &MSO(p), ctx_bin); + ctx->exported = 1; + return ctx; +} + +static int bf_context_destructor(Binary *ctx_bin) +{ + BinaryFindContext *ctx; + + ctx = ERTS_MAGIC_BIN_DATA(ctx_bin); + if (ctx->state != BFDone) { + if (ctx->search->done != NULL) { + ctx->search->done(ctx); + } + ctx->state = BFDone; + } + return 1; +} + +#ifdef HARDDEBUG +static void bf_context_dump(BinaryFindContext *ctx) +{ + if (ctx->pat_type == am_bm) { + BMData *bm; + bm = ERTS_MAGIC_BIN_DATA(ctx->pat_bin); + dump_bm_data(bm); + } else { + ACTrie *act; + act = ERTS_MAGIC_BIN_DATA(ctx->pat_bin); + dump_ac_trie(act); + } +} +#endif + +static Eterm do_match_not_found_result(Process *p, Eterm subject, BinaryFindContext **ctxp); +static Eterm do_match_single_result(Process *p, Eterm subject, BinaryFindContext **ctxp); +static Eterm do_match_global_result(Process *p, Eterm subject, BinaryFindContext **ctxp); +static Eterm do_split_not_found_result(Process *p, Eterm subject, BinaryFindContext **ctxp); +static Eterm do_split_single_result(Process *p, Eterm subject, BinaryFindContext **ctxp); +static Eterm do_split_global_result(Process *p, Eterm subject, BinaryFindContext **ctxp); + +static BFReturn maybe_binary_match_compile(BinaryFindContext *ctx, Eterm arg, Binary **pat_bin) +{ + Eterm *tp; + ctx->pat_term = THE_NON_VALUE; + if (is_tuple(arg)) { + tp = tuple_val(arg); + if (arityval(*tp) != 2 || is_not_atom(tp[1])) { + return BF_BADARG; + } + if (((tp[1] != am_bm) && (tp[1] != am_ac)) || + !is_internal_magic_ref(tp[2])) { + return BF_BADARG; + } + *pat_bin = erts_magic_ref2bin(tp[2]); + if ((tp[1] == am_bm && + ERTS_MAGIC_BIN_DESTRUCTOR(*pat_bin) != cleanup_my_data_bm) || + (tp[1] == am_ac && + ERTS_MAGIC_BIN_DESTRUCTOR(*pat_bin) != cleanup_my_data_ac)) { + *pat_bin = NULL; + return BF_BADARG; + } + ctx->pat_type = tp[1]; + ctx->pat_term = tp[2]; + } else if (do_binary_match_compile(arg, &(ctx->pat_type), pat_bin) != 0) { + return BF_BADARG; + } + return BF_OK; +} static int parse_match_opts_list(Eterm l, Eterm bin, Uint *posp, Uint *endp) { @@ -1134,17 +1281,17 @@ static int parse_split_opts_list(Eterm l, Eterm bin, Uint *posp, Uint *endp, Uin Uint orig_size; if (is_atom(t)) { if (t == am_global) { - *optp |= BINARY_FIND_ALL; + *optp |= BF_FLAG_GLOBAL; l = CDR(list_val(l)); continue; } if (t == am_trim) { - *optp |= BINARY_SPLIT_TRIM; + *optp |= BF_FLAG_SPLIT_TRIM; l = CDR(list_val(l)); continue; } if (t == am_trim_all) { - *optp |= BINARY_SPLIT_TRIM_ALL; + *optp |= BF_FLAG_SPLIT_TRIM_ALL; l = CDR(list_val(l)); continue; } @@ -1197,266 +1344,160 @@ static int parse_split_opts_list(Eterm l, Eterm bin, Uint *posp, Uint *endp, Uin } } -static int do_binary_find(Process *p, Eterm subject, BinaryFindState *bfs, Binary *bin, - Eterm state_term, Eterm *res_term) +static BFReturn do_binary_find(Process *p, Eterm subject, BinaryFindContext **ctxp, + Binary *pat_bin, Binary *ctx_bin, Eterm *res_term) { - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - BinaryFindState_bignum *state_ptr = NULL; + BinaryFindContext *ctx; + int is_first_call; + Uint initial_reds; + BFReturn runres; - ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc); - } - if (state_term != NIL) { - state_ptr = (BinaryFindState_bignum *)(big_val(state_term)); - bfs = &(state_ptr->bfs); + if (ctx_bin == NULL) { + is_first_call = 1; + ctx = *ctxp; + } else { + is_first_call = 0; + ctx = ERTS_MAGIC_BIN_DATA(ctx_bin); + ctx->pat_bin = pat_bin; + *ctxp = ctx; } - if (bfs->flags & BINARY_FIND_ALL) { - if (bfs->type == am_bm) { - BMData *bm; - Sint pos; - BMFindAllState state; - Uint reds = get_reds(p, BM_LOOP_FACTOR); - Uint save_reds = reds; + initial_reds = ctx->reds = get_reds(p, ctx->loop_factor); - bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_bm_data(bm); -#endif - if (state_term == NIL) { - bm_init_find_all(&state, bfs->hsstart, bfs->hsend); - } else { - bm_restore_find_all(&state, &(state_ptr->data.bmfas)); - } + switch (ctx->state) { + case BFSearch: { + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; - pos = bm_find_all_non_overlapping(&state, bm, bytes, &reds); - if (pos == BM_NOT_FOUND) { - *res_term = bfs->not_found_result(p, subject, bfs); - } else if (pos == BM_RESTART) { - int x = - (SIZEOF_BINARY_FIND_ALL_STATE(state) / sizeof(Eterm)) + - !!(SIZEOF_BINARY_FIND_ALL_STATE(state) % sizeof(Eterm)); -#ifdef HARDDEBUG - erts_printf("Trap bm!\n"); -#endif - state_ptr = (BinaryFindState_bignum*) HAlloc(p, x+1); - state_ptr->bignum_hdr = make_pos_bignum_header(x); - memcpy(&state_ptr->bfs, bfs, sizeof(BinaryFindState)); - bm_serialize_find_all(&state, &state_ptr->data.bmfas); - *res_term = make_big(&state_ptr->bignum_hdr); - erts_free_aligned_binary_bytes(temp_alloc); - bm_clean_find_all(&state); - return DO_BIN_MATCH_RESTART; - } else { - *res_term = bfs->global_result(p, subject, bfs, state.out, state.m); - } - erts_free_aligned_binary_bytes(temp_alloc); - bm_clean_find_all(&state); - BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); - return DO_BIN_MATCH_OK; - } else if (bfs->type == am_ac) { - ACTrie *act; - int acr; - ACFindAllState state; - Uint reds = get_reds(p, AC_LOOP_FACTOR); - Uint save_reds = reds; - - act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); + ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc); + } #ifdef HARDDEBUG - dump_ac_trie(act); + bf_context_dump(ctx); #endif - if (state_term == NIL) { - ac_init_find_all(&state, act, bfs->hsstart, bfs->hsend); - } else { - ac_restore_find_all(&state, &(state_ptr->data.acfas)); - } - acr = ac_find_all_non_overlapping(&state, bytes, &reds); - if (acr == AC_NOT_FOUND) { - *res_term = bfs->not_found_result(p, subject, bfs); - } else if (acr == AC_RESTART) { - int x = - (SIZEOF_BINARY_FIND_ALL_STATE(state) / sizeof(Eterm)) + - !!(SIZEOF_BINARY_FIND_ALL_STATE(state) % sizeof(Eterm)); + runres = ctx->search->find(ctx, bytes); + if (runres == BF_NOT_FOUND) { + *res_term = ctx->not_found(p, subject, &ctx); + *ctxp = ctx; + } else if (runres == BF_RESTART) { #ifdef HARDDEBUG + if (ctx->pat_type == am_ac) { erts_printf("Trap ac!\n"); -#endif - state_ptr = (BinaryFindState_bignum*) HAlloc(p, x+1); - state_ptr->bignum_hdr = make_pos_bignum_header(x); - memcpy(&state_ptr->bfs, bfs, sizeof(BinaryFindState)); - ac_serialize_find_all(&state, &state_ptr->data.acfas); - *res_term = make_big(&state_ptr->bignum_hdr); - erts_free_aligned_binary_bytes(temp_alloc); - ac_clean_find_all(&state); - return DO_BIN_MATCH_RESTART; - } else { - *res_term = bfs->global_result(p, subject, bfs, state.out, state.m); - } - erts_free_aligned_binary_bytes(temp_alloc); - ac_clean_find_all(&state); - BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); - return DO_BIN_MATCH_OK; - } - } else { - if (bfs->type == am_bm) { - BMData *bm; - Sint pos; - BMFindFirstState state; - Uint reds = get_reds(p, BM_LOOP_FACTOR); - Uint save_reds = reds; - - bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_bm_data(bm); -#endif - if (state_term == NIL) { - bm_init_find_first_match(&state, bfs->hsstart, bfs->hsend); } else { - memcpy(&state, &state_ptr->data.bmffs, sizeof(BMFindFirstState)); - } - -#ifdef HARDDEBUG - erts_printf("(bm) state->pos = %ld, state->len = %lu\n",state.pos, - state.len); -#endif - pos = bm_find_first_match(&state, bm, bytes, &reds); - if (pos == BM_NOT_FOUND) { - *res_term = bfs->not_found_result(p, subject, bfs); - } else if (pos == BM_RESTART) { - int x = - (SIZEOF_BINARY_FIND_STATE(state) / sizeof(Eterm)) + - !!(SIZEOF_BINARY_FIND_STATE(state) % sizeof(Eterm)); -#ifdef HARDDEBUG erts_printf("Trap bm!\n"); + } #endif - state_ptr = (BinaryFindState_bignum*) HAlloc(p, x+1); - state_ptr->bignum_hdr = make_pos_bignum_header(x); - memcpy(&state_ptr->bfs, bfs, sizeof(BinaryFindState)); - memcpy(&state_ptr->data.acffs, &state, sizeof(BMFindFirstState)); - *res_term = make_big(&state_ptr->bignum_hdr); - erts_free_aligned_binary_bytes(temp_alloc); - return DO_BIN_MATCH_RESTART; - } else { - *res_term = bfs->single_result(p, subject, bfs, pos, bm->len); + if (is_first_call) { + ctx = bf_context_export(p, ctx); + *ctxp = ctx; + erts_set_gc_state(p, 0); } erts_free_aligned_binary_bytes(temp_alloc); - BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); - return DO_BIN_MATCH_OK; - } else if (bfs->type == am_ac) { - ACTrie *act; - Uint pos, rlen; - int acr; - ACFindFirstState state; - Uint reds = get_reds(p, AC_LOOP_FACTOR); - Uint save_reds = reds; - - act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_ac_trie(act); -#endif - if (state_term == NIL) { - ac_init_find_first_match(&state, act, bfs->hsstart, bfs->hsend); - } else { - memcpy(&state, &state_ptr->data.acffs, sizeof(ACFindFirstState)); + *res_term = THE_NON_VALUE; + BUMP_ALL_REDS(p); + return BF_RESTART; + } else { + *res_term = ctx->found(p, subject, &ctx); + *ctxp = ctx; + } + erts_free_aligned_binary_bytes(temp_alloc); + if (*res_term == THE_NON_VALUE) { + if (is_first_call) { + erts_set_gc_state(p, 0); } - acr = ac_find_first_match(&state, bytes, &pos, &rlen, &reds); - if (acr == AC_NOT_FOUND) { - *res_term = bfs->not_found_result(p, subject, bfs); - } else if (acr == AC_RESTART) { - int x = - (SIZEOF_BINARY_FIND_STATE(state) / sizeof(Eterm)) + - !!(SIZEOF_BINARY_FIND_STATE(state) % sizeof(Eterm)); -#ifdef HARDDEBUG - erts_printf("Trap ac!\n"); -#endif - state_ptr = (BinaryFindState_bignum*) HAlloc(p, x+1); - state_ptr->bignum_hdr = make_pos_bignum_header(x); - memcpy(&state_ptr->bfs, bfs, sizeof(BinaryFindState)); - memcpy(&state_ptr->data.acffs, &state, sizeof(ACFindFirstState)); - *res_term = make_big(&state_ptr->bignum_hdr); - erts_free_aligned_binary_bytes(temp_alloc); - return DO_BIN_MATCH_RESTART; - } else { - *res_term = bfs->single_result(p, subject, bfs, pos, rlen); + BUMP_ALL_REDS(p); + return BF_RESTART; + } + if (ctx->search->done != NULL) { + ctx->search->done(ctx); + } + ctx->state = BFDone; + if (!is_first_call) { + erts_set_gc_state(p, 1); + } + BUMP_REDS(p, (initial_reds - ctx->reds) / ctx->loop_factor); + return BF_OK; + } + case BFResult: { + *res_term = ctx->found(p, subject, &ctx); + *ctxp = ctx; + if (*res_term == THE_NON_VALUE) { + if (is_first_call) { + erts_set_gc_state(p, 0); } - erts_free_aligned_binary_bytes(temp_alloc); - BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); - return DO_BIN_MATCH_OK; + BUMP_ALL_REDS(p); + return BF_RESTART; + } + if (ctx->search->done != NULL) { + ctx->search->done(ctx); } + ctx->state = BFDone; + if (!is_first_call) { + erts_set_gc_state(p, 1); + } + BUMP_REDS(p, (initial_reds - ctx->reds) / ctx->loop_factor); + return BF_OK; } - badarg: - return DO_BIN_MATCH_BADARG; + default: + ASSERT(!"Unknown state in do_binary_find"); + } + +badarg: + if (!is_first_call) { + if (ctx->search->done != NULL) { + ctx->search->done(ctx); + } + ctx->state = BFDone; + erts_set_gc_state(p, 1); + } + return BF_BADARG; } static BIF_RETTYPE binary_match(Process *p, Eterm arg1, Eterm arg2, Eterm arg3, Uint flags) { - BinaryFindState bfs; - Eterm *tp; - Binary *bin; - Eterm bin_term = NIL; + BinaryFindContext c_buff; + BinaryFindContext *ctx = &c_buff; + Binary *pat_bin; int runres; Eterm result; - if (is_not_binary(arg1)) { + if (is_not_binary(arg1) || binary_bitsize(arg1) != 0) { goto badarg; } - bfs.flags = flags; - if (parse_match_opts_list(arg3, arg1, &(bfs.hsstart), &(bfs.hsend))) { + ctx->flags = flags; + if (parse_match_opts_list(arg3, arg1, &(ctx->hsstart), &(ctx->hsend))) { goto badarg; } - if (bfs.hsend == 0) { - BIF_RET(do_match_not_found_result(p, arg1, &bfs)); + if (ctx->hsend == 0) { + result = do_match_not_found_result(p, arg1, &ctx); + BIF_RET(result); } - if (is_tuple(arg2)) { - tp = tuple_val(arg2); - if (arityval(*tp) != 2 || is_not_atom(tp[1])) { - goto badarg; - } - if (((tp[1] != am_bm) && (tp[1] != am_ac)) || - !is_internal_magic_ref(tp[2])) { - goto badarg; - } - bfs.type = tp[1]; - bin = erts_magic_ref2bin(tp[2]); - if (bfs.type == am_bm && - ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data_bm) { - goto badarg; - } - if (bfs.type == am_ac && - ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data_ac) { - goto badarg; - } - bin_term = tp[2]; - } else if (do_binary_match_compile(arg2, &(bfs.type), &bin)) { + if (maybe_binary_match_compile(ctx, arg2, &pat_bin) != BF_OK) { goto badarg; } - bfs.not_found_result = &do_match_not_found_result; - bfs.single_result = &do_match_single_result; - bfs.global_result = &do_match_global_result; - runres = do_binary_find(p, arg1, &bfs, bin, NIL, &result); - if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { - Eterm *hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE); - bin_term = erts_mk_magic_ref(&hp, &MSO(p), bin); - } else if (bin_term == NIL) { - erts_bin_free(bin); + bf_context_init(ctx, do_match_not_found_result, do_match_single_result, + do_match_global_result, pat_bin); + runres = do_binary_find(p, arg1, &ctx, pat_bin, NULL, &result); + if (runres == BF_OK && ctx->pat_term == THE_NON_VALUE) { + erts_bin_free(pat_bin); } switch (runres) { - case DO_BIN_MATCH_OK: + case BF_OK: BIF_RET(result); - case DO_BIN_MATCH_RESTART: - BUMP_ALL_REDS(p); - BIF_TRAP3(&binary_find_trap_export, p, arg1, result, bin_term); + case BF_RESTART: + ASSERT(result == THE_NON_VALUE && ctx->trap_term != result && ctx->pat_term != result); + BIF_TRAP3(&binary_find_trap_export, p, arg1, ctx->trap_term, ctx->pat_term); default: goto badarg; } - badarg: - BIF_ERROR(p,BADARG); +badarg: + BIF_ERROR(p, BADARG); } BIF_RETTYPE binary_match_2(BIF_ALIST_2) @@ -1471,76 +1512,52 @@ BIF_RETTYPE binary_match_3(BIF_ALIST_3) BIF_RETTYPE binary_matches_2(BIF_ALIST_2) { - return binary_match(BIF_P, BIF_ARG_1, BIF_ARG_2, THE_NON_VALUE, BINARY_FIND_ALL); + return binary_match(BIF_P, BIF_ARG_1, BIF_ARG_2, THE_NON_VALUE, BF_FLAG_GLOBAL); } BIF_RETTYPE binary_matches_3(BIF_ALIST_3) { - return binary_match(BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3, BINARY_FIND_ALL); + return binary_match(BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3, BF_FLAG_GLOBAL); } static BIF_RETTYPE binary_split(Process *p, Eterm arg1, Eterm arg2, Eterm arg3) { - BinaryFindState bfs; - Eterm *tp; - Binary *bin; - Eterm bin_term = NIL; + BinaryFindContext c_buff; + BinaryFindContext *ctx = &c_buff; + Binary *pat_bin; int runres; Eterm result; - if (is_not_binary(arg1)) { + if (is_not_binary(arg1) || binary_bitsize(arg1) != 0) { goto badarg; } - if (parse_split_opts_list(arg3, arg1, &(bfs.hsstart), &(bfs.hsend), &(bfs.flags))) { + if (parse_split_opts_list(arg3, arg1, &(ctx->hsstart), &(ctx->hsend), &(ctx->flags))) { goto badarg; } - if (bfs.hsend == 0) { - result = do_split_not_found_result(p, arg1, &bfs); + if (ctx->hsend == 0) { + result = do_split_not_found_result(p, arg1, &ctx); BIF_RET(result); } - if (is_tuple(arg2)) { - tp = tuple_val(arg2); - if (arityval(*tp) != 2 || is_not_atom(tp[1])) { - goto badarg; - } - if (((tp[1] != am_bm) && (tp[1] != am_ac)) || - !is_internal_magic_ref(tp[2])) { - goto badarg; - } - bfs.type = tp[1]; - bin = erts_magic_ref2bin(tp[2]); - if (bfs.type == am_bm && - ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data_bm) { - goto badarg; - } - if (bfs.type == am_ac && - ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data_ac) { - goto badarg; - } - bin_term = tp[2]; - } else if (do_binary_match_compile(arg2, &(bfs.type), &bin)) { + if (maybe_binary_match_compile(ctx, arg2, &pat_bin) != BF_OK) { goto badarg; } - bfs.not_found_result = &do_split_not_found_result; - bfs.single_result = &do_split_single_result; - bfs.global_result = &do_split_global_result; - runres = do_binary_find(p, arg1, &bfs, bin, NIL, &result); - if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { - Eterm *hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE); - bin_term = erts_mk_magic_ref(&hp, &MSO(p), bin); - } else if (bin_term == NIL) { - erts_bin_free(bin); - } - switch(runres) { - case DO_BIN_MATCH_OK: + bf_context_init(ctx, do_split_not_found_result, do_split_single_result, + do_split_global_result, pat_bin); + runres = do_binary_find(p, arg1, &ctx, pat_bin, NULL, &result); + if (runres == BF_OK && ctx->pat_term == THE_NON_VALUE) { + erts_bin_free(pat_bin); + } + switch (runres) { + case BF_OK: BIF_RET(result); - case DO_BIN_MATCH_RESTART: - BIF_TRAP3(&binary_find_trap_export, p, arg1, result, bin_term); + case BF_RESTART: + ASSERT(result == THE_NON_VALUE && ctx->trap_term != result && ctx->pat_term != result); + BIF_TRAP3(&binary_find_trap_export, p, arg1, ctx->trap_term, ctx->pat_term); default: goto badarg; } - badarg: +badarg: BIF_ERROR(p, BADARG); } @@ -1554,72 +1571,117 @@ BIF_RETTYPE binary_split_3(BIF_ALIST_3) return binary_split(BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3); } -static Eterm do_match_not_found_result(Process *p, Eterm subject, BinaryFindState *bfs) +static Eterm do_match_not_found_result(Process *p, Eterm subject, BinaryFindContext **ctxp) { - if (bfs->flags & BINARY_FIND_ALL) { + if ((*ctxp)->flags & BF_FLAG_GLOBAL) { return NIL; } else { return am_nomatch; } } -static Eterm do_match_single_result(Process *p, Eterm subject, BinaryFindState *bfs, - Sint pos, Sint len) +static Eterm do_match_single_result(Process *p, Eterm subject, BinaryFindContext **ctxp) { + BinaryFindContext *ctx = (*ctxp); + BinaryFindFirstContext *ff = &(ctx->u.ff); Eterm erlen; Eterm *hp; Eterm ret; - erlen = erts_make_integer((Uint)(len), p); - ret = erts_make_integer(pos, p); + erlen = erts_make_integer((Uint)(ff->len), p); + ret = erts_make_integer(ff->pos, p); hp = HAlloc(p, 3); ret = TUPLE2(hp, ret, erlen); return ret; } -static Eterm do_match_global_result(Process *p, Eterm subject, BinaryFindState *bfs, - FindallData *fad, Uint fad_sz) +static Eterm do_match_global_result(Process *p, Eterm subject, BinaryFindContext **ctxp) { - Sint i; + BinaryFindContext *ctx = (*ctxp); + BinaryFindAllContext *fa = &(ctx->u.fa); + FindallData *fad; Eterm tpl; - Eterm *hp; - Eterm ret; + Sint i; + register Uint reds = ctx->reds; - for (i = 0; i < fad_sz; ++i) { - fad[i].epos = erts_make_integer(fad[i].pos, p); - fad[i].elen = erts_make_integer(fad[i].len, p); + if (ctx->state == BFSearch) { + if (ctx->pat_type == am_ac) { + fa->data = fa->d.ac.out; + fa->size = fa->d.ac.m; + } else { + fa->data = fa->d.bm.out; + fa->size = fa->d.bm.m; + } + fa->tail = fa->size - 1; + fa->head = 0; + fa->end_pos = 0; + fa->term = NIL; + if (ctx->exported == 0 && ((fa->size * 2) >= reds)) { + ctx = bf_context_export(p, ctx); + *ctxp = ctx; + fa = &(ctx->u.fa); + } + erts_factory_proc_prealloc_init(&(fa->factory), p, fa->size * (3 + 2)); + ctx->state = BFResult; + } + + fad = fa->data; + + if (fa->end_pos == 0) { + for (i = fa->head; i < fa->size; ++i) { + if (--reds == 0) { + ASSERT(ctx->exported == 1); + fa->head = i; + ctx->reds = reds; + return THE_NON_VALUE; + } + fad[i].epos = erts_make_integer(fad[i].pos, p); + fad[i].elen = erts_make_integer(fad[i].len, p); + } + fa->end_pos = 1; + fa->head = fa->tail; } - hp = HAlloc(p, fad_sz * (3 + 2)); - ret = NIL; - for (i = fad_sz - 1; i >= 0; --i) { - tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); - hp += 3; - ret = CONS(hp, tpl, ret); - hp += 2; + + for (i = fa->head; i >= 0; --i) { + if (--reds == 0) { + ASSERT(ctx->exported == 1); + fa->head = i; + ctx->reds = reds; + return THE_NON_VALUE; + } + tpl = TUPLE2(fa->factory.hp, fad[i].epos, fad[i].elen); + fa->factory.hp += 3; + fa->term = CONS(fa->factory.hp, tpl, fa->term); + fa->factory.hp += 2; } + ctx->reds = reds; + erts_factory_close(&(fa->factory)); - return ret; + return fa->term; } -static Eterm do_split_not_found_result(Process *p, Eterm subject, BinaryFindState *bfs) +static Eterm do_split_not_found_result(Process *p, Eterm subject, BinaryFindContext **ctxp) { + BinaryFindContext *ctx = (*ctxp); Eterm *hp; Eterm ret; - if (bfs->flags & (BINARY_SPLIT_TRIM | BINARY_SPLIT_TRIM_ALL) + if (ctx->flags & (BF_FLAG_SPLIT_TRIM | BF_FLAG_SPLIT_TRIM_ALL) && binary_size(subject) == 0) { - return NIL; + return NIL; } hp = HAlloc(p, 2); ret = CONS(hp, subject, NIL); - return ret; } -static Eterm do_split_single_result(Process *p, Eterm subject, BinaryFindState *bfs, - Sint pos, Sint len) +static Eterm do_split_single_result(Process *p, Eterm subject, BinaryFindContext **ctxp) { + BinaryFindContext *ctx = (*ctxp); + BinaryFindFirstContext *ff = &(ctx->u.ff); + Sint pos; + Sint len; size_t orig_size; Eterm orig; Uint offset; @@ -1630,9 +1692,12 @@ static Eterm do_split_single_result(Process *p, Eterm subject, BinaryFindState * Eterm *hp; Eterm ret; + pos = ff->pos; + len = ff->len; + orig_size = binary_size(subject); - if ((bfs->flags & (BINARY_SPLIT_TRIM | BINARY_SPLIT_TRIM_ALL)) && + if ((ctx->flags & (BF_FLAG_SPLIT_TRIM | BF_FLAG_SPLIT_TRIM_ALL)) && (orig_size - pos - len) == 0) { if (pos == 0) { ret = NIL; @@ -1653,7 +1718,7 @@ static Eterm do_split_single_result(Process *p, Eterm subject, BinaryFindState * hp += 2; } } else { - if ((bfs->flags & BINARY_SPLIT_TRIM_ALL) && (pos == 0)) { + if ((ctx->flags & BF_FLAG_SPLIT_TRIM_ALL) && (pos == 0)) { hp = HAlloc(p, 1 * (ERL_SUB_BIN_SIZE + 2)); ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); sb1 = NULL; @@ -1691,39 +1756,60 @@ static Eterm do_split_single_result(Process *p, Eterm subject, BinaryFindState * return ret; } -static Eterm do_split_global_result(Process *p, Eterm subject, BinaryFindState *bfs, - FindallData *fad, Uint fad_sz) +static Eterm do_split_global_result(Process *p, Eterm subject, BinaryFindContext **ctxp) { - size_t orig_size; + BinaryFindContext *ctx = (*ctxp); + BinaryFindAllContext *fa = &(ctx->u.fa); + FindallData *fad; Eterm orig; + size_t orig_size; Uint offset; Uint bit_offset; Uint bit_size; ErlSubBin *sb; + Uint do_trim; Sint i; - Sint tail; - Uint list_size; - Uint end_pos; - Uint do_trim = bfs->flags & (BINARY_SPLIT_TRIM | BINARY_SPLIT_TRIM_ALL); - Eterm *hp; - Eterm *hendp; - Eterm ret; + register Uint reds = ctx->reds; - tail = fad_sz - 1; - list_size = fad_sz + 1; - orig_size = binary_size(subject); - end_pos = (Uint)(orig_size); + if (ctx->state == BFSearch) { + if (ctx->pat_type == am_ac) { + fa->data = fa->d.ac.out; + fa->size = fa->d.ac.m; + } else { + fa->data = fa->d.bm.out; + fa->size = fa->d.bm.m; + } + fa->tail = fa->size - 1; + fa->head = fa->tail; + orig_size = binary_size(subject); + fa->end_pos = (Uint)(orig_size); + fa->term = NIL; + if (ctx->exported == 0 && ((fa->head + 1) >= reds)) { + ctx = bf_context_export(p, ctx); + *ctxp = ctx; + fa = &(ctx->u.fa); + } + erts_factory_proc_prealloc_init(&(fa->factory), p, (fa->size + 1) * (ERL_SUB_BIN_SIZE + 2)); + ctx->state = BFResult; + } - hp = HAlloc(p, list_size * (ERL_SUB_BIN_SIZE + 2)); - hendp = hp + list_size * (ERL_SUB_BIN_SIZE + 2); ERTS_GET_REAL_BIN(subject, orig, offset, bit_offset, bit_size); ASSERT(bit_size == 0); + fad = fa->data; + do_trim = ctx->flags & (BF_FLAG_SPLIT_TRIM | BF_FLAG_SPLIT_TRIM_ALL); - ret = NIL; - - for (i = tail; i >= 0; --i) { - sb = (ErlSubBin *)(hp); - sb->size = end_pos - (fad[i].pos + fad[i].len); + for (i = fa->head; i >= 0; --i) { + if (--reds == 0) { + ASSERT(ctx->exported == 1); + fa->head = i; + ctx->reds = reds; + if (!do_trim && (ctx->flags & BF_FLAG_SPLIT_TRIM)) { + ctx->flags &= ~BF_FLAG_SPLIT_TRIM; + } + return THE_NON_VALUE; + } + sb = (ErlSubBin *)(fa->factory.hp); + sb->size = fa->end_pos - (fad[i].pos + fad[i].len); if (!(sb->size == 0 && do_trim)) { sb->thing_word = HEADER_SUB_BIN; sb->offs = offset + fad[i].pos + fad[i].len; @@ -1731,15 +1817,18 @@ static Eterm do_split_global_result(Process *p, Eterm subject, BinaryFindState * sb->bitoffs = bit_offset; sb->bitsize = 0; sb->is_writable = 0; - hp += ERL_SUB_BIN_SIZE; - ret = CONS(hp, make_binary(sb), ret); - hp += 2; - do_trim &= ~BINARY_SPLIT_TRIM; + fa->factory.hp += ERL_SUB_BIN_SIZE; + fa->term = CONS(fa->factory.hp, make_binary(sb), fa->term); + fa->factory.hp += 2; + do_trim &= ~BF_FLAG_SPLIT_TRIM; } - end_pos = fad[i].pos; + fa->end_pos = fad[i].pos; } - sb = (ErlSubBin *)(hp); + fa->head = i; + ctx->reds = reds; + + sb = (ErlSubBin *)(fa->factory.hp); sb->size = fad[0].pos; if (!(sb->size == 0 && do_trim)) { sb->thing_word = HEADER_SUB_BIN; @@ -1748,26 +1837,31 @@ static Eterm do_split_global_result(Process *p, Eterm subject, BinaryFindState * sb->bitoffs = bit_offset; sb->bitsize = 0; sb->is_writable = 0; - hp += ERL_SUB_BIN_SIZE; - ret = CONS(hp, make_binary(sb), ret); - hp += 2; + fa->factory.hp += ERL_SUB_BIN_SIZE; + fa->term = CONS(fa->factory.hp, make_binary(sb), fa->term); + fa->factory.hp += 2; } - HRelease(p, hendp, hp); - return ret; + erts_factory_close(&(fa->factory)); + + return fa->term; } static BIF_RETTYPE binary_find_trap(BIF_ALIST_3) { int runres; Eterm result; - Binary *bin = erts_magic_ref2bin(BIF_ARG_3); - - runres = do_binary_find(BIF_P, BIF_ARG_1, NULL, bin, BIF_ARG_2, &result); - if (runres == DO_BIN_MATCH_OK) { + Binary *ctx_bin = erts_magic_ref2bin(BIF_ARG_2); + Binary *pat_bin = erts_magic_ref2bin(BIF_ARG_3); + BinaryFindContext *ctx = NULL; + + ASSERT(ERTS_MAGIC_BIN_DESTRUCTOR(ctx_bin) == bf_context_destructor); + runres = do_binary_find(BIF_P, BIF_ARG_1, &ctx, pat_bin, ctx_bin, &result); + if (runres == BF_OK) { + ASSERT(result != THE_NON_VALUE); BIF_RET(result); } else { - BUMP_ALL_REDS(BIF_P); - BIF_TRAP3(&binary_find_trap_export, BIF_P, BIF_ARG_1, result, BIF_ARG_3); + ASSERT(result == THE_NON_VALUE && ctx->trap_term != result && ctx->pat_term != result); + BIF_TRAP3(&binary_find_trap_export, BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3); } } |