aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPatrik Nyblom <[email protected]>2010-04-21 17:18:41 +0200
committerBjörn Gustavsson <[email protected]>2010-05-17 15:51:49 +0200
commit8e8e10d9d080655edba6dedbc13d9e729f209e2e (patch)
tree583358a3f97247a3641963199efcb159dc7ad9d4
parente27516edd537045e1151dc8a95c821ba18aadf4f (diff)
downloadotp-8e8e10d9d080655edba6dedbc13d9e729f209e2e.tar.gz
otp-8e8e10d9d080655edba6dedbc13d9e729f209e2e.tar.bz2
otp-8e8e10d9d080655edba6dedbc13d9e729f209e2e.zip
Move binary module bif's to erl_bif_binary.c
-rw-r--r--erts/emulator/Makefile.in3
-rw-r--r--erts/emulator/beam/binary.c1467
-rw-r--r--erts/emulator/beam/erl_bif_binary.c1548
3 files changed, 1550 insertions, 1468 deletions
diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in
index 9f10a0ffaa..d767194d4d 100644
--- a/erts/emulator/Makefile.in
+++ b/erts/emulator/Makefile.in
@@ -735,7 +735,8 @@ RUN_OBJS = \
$(OBJDIR)/erl_drv_thread.o $(OBJDIR)/erl_bif_chksum.o \
$(OBJDIR)/erl_bif_re.o $(OBJDIR)/erl_unicode.o \
$(OBJDIR)/packet_parser.o $(OBJDIR)/safe_hash.o \
- $(OBJDIR)/erl_zlib.o $(OBJDIR)/erl_nif.o
+ $(OBJDIR)/erl_zlib.o $(OBJDIR)/erl_nif.o \
+ $(OBJDIR)/erl_bif_binary.o
ifeq ($(TARGET),win32)
DRV_OBJS = \
diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c
index 6b6571a1b2..7ca3eb686d 100644
--- a/erts/emulator/beam/binary.c
+++ b/erts/emulator/beam/binary.c
@@ -676,1470 +676,3 @@ bitstr_list_len(Eterm obj)
return (Sint) -1;
}
-/*
- * The native implementation functions for the module binary.
- * Searching is implemented using aither Boyer-More or Aho-Corasick
- * depending on number of searchstrings (BM if one, AC if more than one).
- * Native implementation is mostly for efficiency, nothing (except binary:referenced_byte_size)
- * really *needs* to be implemented in native code.
- */
-
-/* #define HARDDEBUG */
-
-/*
- * A micro allocator used when building search structures, just a convenience
- * for building structures inside a pre alocated magic binary using conventional
- * malloc-like interface.
- */
-static Export binary_match_trap_export;
-static BIF_RETTYPE binary_match_trap(BIF_ALIST_3);
-static Export binary_matches_trap_export;
-static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3);
-static Uint max_loop_limit;
-
-
-void erts_init_bif_binary(void)
-{
- sys_memset((void *) &binary_match_trap_export, 0, sizeof(Export));
- binary_match_trap_export.address = &binary_match_trap_export.code[3];
- binary_match_trap_export.code[0] = am_erlang;
- binary_match_trap_export.code[1] = am_binary_match_trap;
- binary_match_trap_export.code[2] = 3;
- binary_match_trap_export.code[3] = (BeamInstr) em_apply_bif;
- binary_match_trap_export.code[4] = (BeamInstr) &binary_match_trap;
-
- sys_memset((void *) &binary_matches_trap_export, 0, sizeof(Export));
- binary_matches_trap_export.address = &binary_matches_trap_export.code[3];
- binary_matches_trap_export.code[0] = am_erlang;
- binary_matches_trap_export.code[1] = am_binary_matches_trap;
- binary_matches_trap_export.code[2] = 3;
- binary_matches_trap_export.code[3] = (BeamInstr) em_apply_bif;
- binary_matches_trap_export.code[4] = (BeamInstr) &binary_matches_trap;
-
- max_loop_limit = 0;
- return;
-}
-
-Sint erts_binary_set_loop_limit(Sint limit)
-{
- Sint save = (Sint) max_loop_limit;
- if (limit <= 0) {
- max_loop_limit = 0;
- } else {
- max_loop_limit = (Uint) limit;
- }
- return save;
-}
-
-static Uint get_reds(Process *p, int loop_factor)
-{
- Uint reds = ERTS_BIF_REDS_LEFT(p) * loop_factor;
- Uint tmp = max_loop_limit;
- if (tmp != 0 && tmp < reds) {
- return tmp;
- }
- return reds;
-}
-
-#define MYALIGN(Size) (SIZEOF_VOID_P * (((Size) / SIZEOF_VOID_P) + \
- !!(((Size) % SIZEOF_VOID_P))))
-
-#ifdef DEBUG
-#define CHECK_ALLOCATOR(My) ASSERT((My).current <= ((My).mem + (My).size))
-#else
-#define CHECK_ALLOCATOR(My) /* nothing */
-#endif
-
-typedef struct _my_allocator {
- Uint size;
- byte *current;
- byte *mem;
-} MyAllocator;
-
-static void init_my_allocator(MyAllocator *my, Uint siz, byte *array)
-{
- ASSERT((siz % SIZEOF_VOID_P) == 0);
- my->size = siz;
- my->mem = array;
- my->current = my->mem;
-}
-
-static void *my_alloc(MyAllocator *my, Uint size)
-{
- void *ptr = my->current;
- my->current += MYALIGN(size);
- return ptr;
-}
-
-/*
- * The search functionality.
- *
- * The search is byte oriented, which works nicely for UTF-8 as well as latin1 data
- */
-
-#define ALPHABET_SIZE 256
-
-typedef struct _ac_node {
-#ifdef HARDDEBUG
- Uint32 id; /* To identify h pointer targets when dumping */
-#endif
- Uint32 d; /* Depth in trie, also represents the length
- (-1) of the matched string if in
- final set */
- Sint32 final; /* Members in final set represent matches.
- * The set representation is scattered
- * among the nodes in this way:
- * >0 -> this represents a member of
- * the final set, <0 -> member of
- * final set somewhere in the failure chain,
- * 0 -> not member of the final set */
- struct _ac_node *h; /* h(Hode) is the failure function */
- struct _ac_node *g[ALPHABET_SIZE]; /* g(Node,Character) is the
- transition function */
-} ACNode;
-
-typedef struct _ac_trie {
-#ifdef HARDDEBUG
- Uint32 idc;
-#endif
- Uint32 counter; /* Number of added patterns */
- ACNode *root; /* pointer to the root state */
-} ACTrie;
-
-typedef struct _bm_data {
- byte *x;
- Sint len;
- Sint *goodshift;
- Sint badshift[ALPHABET_SIZE];
-} BMData;
-
-#ifdef HARDDEBUG
-static void dump_bm_data(BMData *bm);
-static void dump_ac_trie(ACTrie *act);
-static void dump_ac_node(ACNode *node, int indent, int ch);
-#endif
-
-/*
- * The needed size of binary data for a search structure - given the accumulated
- * string lengths.
- */
-#define BM_SIZE(StrLen) /* StrLen: length of searchstring */ \
-((MYALIGN(sizeof(Sint) * (StrLen))) + /* goodshift array */ \
- MYALIGN(StrLen) + /* searchstring saved */ \
- (MYALIGN(sizeof(BMData)))) /* Structure */
-
-#define AC_SIZE(StrLens) /* StrLens: sum of all searchstring lengths */ \
-((MYALIGN(sizeof(ACNode)) * \
-((StrLens)+1)) + /* The actual nodes (including rootnode) */ \
- MYALIGN(sizeof(ACTrie))) /* Structure */
-
-
-#ifndef MAX
-#define MAX(A,B) (((A) > (B)) ? (A) : B)
-#endif
-
-/*
- * Callback for the magic binary
- */
-static void cleanup_my_data(Binary *bp)
-{
- return;
-}
-
-/*
- * Initiate a (allocated) micro allocator and fill in the base
- * for an Aho-Corasick search trie, given the accumulated length of the search strings.
- */
-static ACTrie *create_acdata(MyAllocator *my, Uint len,
- ACNode ***qbuff /* out */,Binary **the_bin /* out */)
-{
- Uint datasize = AC_SIZE(len);
- ACTrie *act;
- ACNode *acn;
- Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data);
- byte *data = ERTS_MAGIC_BIN_DATA(mb);
-
- init_my_allocator(my, datasize, data);
- act = my_alloc(my, sizeof(ACTrie)); /* Important that this is the first
- allocation */
- act->counter = 0;
- act->root = acn = my_alloc(my, sizeof(ACNode));
- acn->d = 0;
- acn->final = 0;
- acn->h = NULL;
- memset(acn->g, 0, sizeof(ACNode *) * ALPHABET_SIZE);
-#ifdef HARDDEBUG
- act->idc = 0;
- acn->id = 0;
-#endif
- *qbuff = erts_alloc(ERTS_ALC_T_TMP, sizeof(ACNode *) * len);
- *the_bin = mb;
- return act;
-}
-
-/*
- * The same initialization of allocator and basic data for Boyer-More.
- */
-static BMData *create_bmdata(MyAllocator *my, byte *x, Uint len, Binary **the_bin /* out */)
-{
- Uint datasize = BM_SIZE(len);
- BMData *bmd;
- Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data);
- byte *data = ERTS_MAGIC_BIN_DATA(mb);
- init_my_allocator(my, datasize, data);
- bmd = my_alloc(my, sizeof(BMData));
- bmd->x = my_alloc(my,len);
- memcpy(bmd->x,x,len);
- bmd->len = len;
- bmd->goodshift = my_alloc(my,sizeof(Uint) * len);
- *the_bin = mb;
- return bmd;
-}
-
-/*
- * Compilation of search structures
- */
-
-/*
- * Aho Corasick - Build a Trie and fill in the failure functions
- * when all strings are added.
- * The algorithm is nicely described by Dieter Bühler of University of Tübingen:
- * http://www-sr.informatik.uni-tuebingen.de/~buehler/AC/AC.html
- */
-
-/*
- * Helper called ance for each search pattern
- */
-static void ac_add_one_pattern(MyAllocator *my, ACTrie *act, byte *x, Uint len)
-{
- ACNode *acn = act->root;
- Uint32 n = ++act->counter; /* Always increase conter, even if it's a duplicate
- as this shall identify the pattern in the
- final set and eventually be returned to
- the caller (in Erlang) */
- Uint i = 0;
-
- while(i < len) {
- if (acn->g[x[i]] != NULL) {
- /* node exists, continue */
- acn = acn->g[x[i]];
- ++i;
- } else {
- /* allocate a new node */
- ACNode *nn = my_alloc(my,sizeof(ACNode));
-#ifdef HARDDEBUG
- nn->id = ++(act->idc);
-#endif
- nn->d = i+1;
- nn->h = act->root;
- nn->final = 0;
- memset(nn->g, 0, sizeof(ACNode *) * ALPHABET_SIZE);
- acn->g[x[i]] = nn;
- ++i;
- acn = nn;
- }
- }
- if (acn->final == 0) { /* New pattern, add to final set */
- acn->final = n;
- }
-}
-
-/*
- * Called when all search patterns are added.
- */
-static void ac_compute_failure_functions(ACTrie *act, ACNode **qbuff)
-{
- ACNode *root = act->root;
- ACNode *parent;
- int i;
- int qh = 0,qt = 0;
- ACNode *child, *r;
-
- /* Set all children of the root to have the root as failure function */
- for (i = 0; i < ALPHABET_SIZE; ++i) {
- if (root->g[i] != NULL) {
- root->g[i]->h = root;
- /* Add to que for later traversal */
- qbuff[qt++] = root->g[i];
- }
- }
-
- /* So, now we've handled children of the root state, traverse the
- rest of the trie BF... */
- while (qh < qt) {
- parent = qbuff[qh++];
- for (i = 0; i < ALPHABET_SIZE; ++ i) {
- if ((child = parent->g[i]) != NULL) {
- /* Visit this node to */
- qbuff[qt++] = child;
- /* Search for correct failure function, follow the parents failure
- function until you find a similar transition funtion to this
- childs */
- r = parent->h;
- while (r != NULL && r->g[i] == NULL) {
- r = r->h;
- }
- if (r == NULL) {
- /* Replace NULL failures with the root as we go */
- child->h = (root->g[i] == NULL) ? root : root->g[i];
- } else {
- child->h = r->g[i];
- /*
- * The "final" set is scattered among the nodes. When
- * the failure function points to a member of the final set,
- * we have a match, but we might not see it in the current node
- * if we dont mark it as a special type of final, i.e. foolow
- * the failure function and you will find a real member of final
- * set. This is marked with a negative string id and only done if
- * this node does not represent a member in the final set.
- */
- if (!(child->final) && (child->h->final)) {
- child->final = -1;
- }
- }
- }
- }
- }
- /* Finally the failure function of the root should point to itself */
- root->h = root;
-}
-
-
-/*
- * The actual searching for needles in the haystack...
- * Find first match using Aho-Coracick Trie
- * return pattern number and fill in mpos + mlen if found, otherwise return 0
- * Return the matching pattern that *starts* first, and ends
- * last (difference when overlapping), hence the candidate thing.
- * Basic AC finds the first end before the first start...
- *
- */
-typedef struct {
- ACNode *q;
- Uint pos;
- Uint len;
- ACNode *candidate;
- Uint candidate_start;
-} ACFindFirstState;
-
-
-static void ac_init_find_first_match(ACFindFirstState *state, ACTrie *act, Sint startpos, Uint len)
-{
- state->q = act->root;
- state->pos = startpos;
- state->len = len;
- state->candidate = NULL;
- state->candidate_start = 0;
-}
-#define AC_OK 0
-#define AC_NOT_FOUND -1
-#define AC_RESTART -2
-
-#define AC_LOOP_FACTOR 10
-
-static int ac_find_first_match(ACFindFirstState *state, byte *haystack,
- Uint *mpos, Uint *mlen, Uint *reductions)
-{
- ACNode *q = state->q;
- Uint i = state->pos;
- ACNode *candidate = state->candidate, *r;
- Uint len = state->len;
- Uint candidate_start = state->candidate_start;
- Uint rstart;
- register Uint reds = *reductions;
-
- while (i < len) {
- if (--reds == 0) {
- state->q = q;
- state->pos = i;
- state->len = len;
- state->candidate = candidate;
- state->candidate_start = candidate_start;
- return AC_RESTART;
- }
-
- while (q->g[haystack[i]] == NULL && q->h != q) {
- q = q->h;
- }
- if (q->g[haystack[i]] != NULL) {
- q = q->g[haystack[i]];
- }
-#ifdef HARDDEBUG
- erts_printf("ch = %c, Current: %u\n", (int) haystack[i], (unsigned) q->id);
-#endif
- ++i;
- if (candidate != NULL && (i - q->d) > candidate_start) {
- break;
- }
- if (q->final) {
- r = q;
- while (r->final < 0)
- r = r->h;
- rstart = i - r->d;
- if (candidate == NULL || rstart < candidate_start ||
- (rstart == candidate_start && candidate->d < q->d)) {
- candidate_start = rstart;
- candidate = r;
- }
- }
- }
- *reductions = reds;
- if (!candidate) {
- return AC_NOT_FOUND;
- }
-#ifdef HARDDEBUG
- dump_ac_node(candidate,0,'?');
-#endif
- *mpos = candidate_start;
- *mlen = candidate->d;
- return AC_OK;
-}
-
-typedef struct _findall_data {
- Uint pos;
- Uint len;
-#ifdef HARDDEBUG
- Uint id;
-#endif
- Eterm epos;
- Eterm elen;
-} FindallData;
-
-typedef struct {
- ACNode *q;
- Uint pos;
- Uint len;
- Uint m;
- Uint allocated;
- FindallData *out;
-} ACFindAllState;
-
-static void ac_init_find_all(ACFindAllState *state, ACTrie *act, Sint startpos, Uint len)
-{
- state->q = act->root;
- state->pos = startpos;
- state->len = len;
- state->m = 0;
- state->allocated = 0;
- state->out = NULL;
-}
-
-static void ac_restore_find_all(ACFindAllState *state, char *buff)
-{
- memcpy(state,buff,sizeof(ACFindAllState));
- state->out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * (state->allocated));
- memcpy(state->out,buff+sizeof(ACFindAllState),sizeof(FindallData)*state->m);
-}
-
-static void ac_serialize_find_all(ACFindAllState *state, char *buff)
-{
- memcpy(buff,state,sizeof(ACFindAllState));
- memcpy(buff+sizeof(ACFindAllState),state->out,sizeof(FindallData)*state->m);
-}
-
-static void ac_clean_find_all(ACFindAllState *state)
-{
- if (state->out != NULL) {
- erts_free(ERTS_ALC_T_TMP, state->out);
- }
-#ifdef HARDDEBUG
- state->out = NULL;
- state->allocated = 0;
-#endif
-}
-
-#define SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(S) (sizeof(ACFindAllState)+(sizeof(FindallData)*(S).m))
-
-/*
- * Differs to the find_first function in that it stores all matches and the values
- * arte returned only in the state.
- */
-static int ac_find_all_non_overlapping(ACFindAllState *state, byte *haystack,
- Uint *reductions)
-{
- ACNode *q = state->q;
- Uint i = state->pos;
- Uint rstart;
- ACNode *r;
- Uint len = state->len;
- Uint m = state->m, save_m;
- Uint allocated = state->allocated;
- FindallData *out = state->out;
- register Uint reds = *reductions;
-
-
- while (i < len) {
- if (--reds == 0) {
- state->q = q;
- state->pos = i;
- state->len = len;
- state->m = m;
- state->allocated = allocated;
- state->out = out;
- return AC_RESTART;
- }
- while (q->g[haystack[i]] == NULL && q->h != q) {
- q = q->h;
- }
- if (q->g[haystack[i]] != NULL) {
- q = q->g[haystack[i]];
- }
- ++i;
- if (q->final) {
- r = q;
- while (r->final) {
- while (r->final < 0)
- r = r->h;
-#ifdef HARDDEBUG
- erts_printf("Trying to add %u\n",(unsigned) r->final);
-#endif
- rstart = i - r->d;
- save_m = m;
- while (m > 0 && (out[m-1].pos > rstart ||
- (out[m-1].pos == rstart &&
- out[m-1].len < r->d))) {
-#ifdef HARDDEBUG
- erts_printf("Popping %u\n",(unsigned) out[m-1].id);
-#endif
- --m;
- }
-#ifdef HARDDEBUG
- if (m > 0) {
- erts_printf("Pos %u\n",out[m-1].pos);
- erts_printf("Len %u\n",out[m-1].len);
- }
- erts_printf("Rstart %u\n",rstart);
-#endif
- if (m == 0 || out[m-1].pos + out[m-1].len <= rstart) {
- if (m >= allocated) {
- if (!allocated) {
- allocated = 10;
- out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * allocated);
- } else {
- allocated *= 2;
- out = erts_realloc(ERTS_ALC_T_TMP, out,
- sizeof(FindallData) * allocated);
- }
- }
- out[m].pos = rstart;
- out[m].len = r->d;
-#ifdef HARDDEBUG
- out[m].id = r->final;
-#endif
- ++m;
-#ifdef HARDDEBUG
- erts_printf("Pushing %u\n",(unsigned) out[m-1].id);
-#endif
- } else {
-#ifdef HARDDEBUG
- erts_printf("Backtracking %d steps\n",save_m - m);
-#endif
- m = save_m;
- }
- r = r->h;
- }
- }
- }
- *reductions = reds;
- state->m = m;
- state->out = out;
- return (m == 0) ? AC_NOT_FOUND : AC_OK;
-}
-
-/*
- * Boyer More - most obviously implemented more or less exactly as Christian Charras
- * and Thierry Lecroq describes it in "Handbook of Exact String-Matching Algorithms"
- * http://www-igm.univ-mlv.fr/~lecroq/string/
- */
-
-/*
- * Call this to compute badshifts array
- */
-static void compute_badshifts(BMData *bmd)
-{
- Sint i;
- Sint m = bmd->len;
-
- for (i = 0; i < ALPHABET_SIZE; ++i) {
- bmd->badshift[i] = m;
- }
- for (i = 0; i < m - 1; ++i) {
- bmd->badshift[bmd->x[i]] = m - i - 1;
- }
-}
-
-/* Helper for "compute_goodshifts" */
-static void compute_suffixes(byte *x, Sint m, Sint *suffixes)
-{
- int f,g,i;
-
- suffixes[m - 1] = m;
-
- f = 0; /* To avoid use before set warning */
-
- g = m - 1;
-
- for (i = m - 2; i >= 0; --i) {
- if (i > g && suffixes[i + m - f] < i - g) {
- suffixes[i] = suffixes[i + m - 1 - f];
- } else {
- if (i < g) {
- g = i;
- }
- f = i;
- while ( g >= 0 && x[g] == x[g + m - 1 - f] ) {
- --g;
- }
- suffixes[i] = f - g;
- }
- }
-}
-
-/*
- * Call this to compute goodshift array
- */
-static void compute_goodshifts(BMData *bmd)
-{
- Sint m = bmd->len;
- byte *x = bmd->x;
- Sint i, j;
- Sint *suffixes = erts_alloc(ERTS_ALC_T_TMP, m * sizeof(Uint));
-
- compute_suffixes(x, m, suffixes);
-
- for (i = 0; i < m; ++i) {
- bmd->goodshift[i] = m;
- }
-
- j = 0;
-
- for (i = m - 1; i >= -1; --i) {
- if (i == -1 || suffixes[i] == i + 1) {
- while (j < m - 1 - i) {
- if (bmd->goodshift[j] == m) {
- bmd->goodshift[j] = m - 1 - i;
- }
- ++j;
- }
- }
- }
- for (i = 0; i <= m - 2; ++i) {
- bmd->goodshift[m - 1 - suffixes[i]] = m - 1 - i;
- }
- erts_free(ERTS_ALC_T_TMP, suffixes);
-}
-
-typedef struct {
- Sint pos;
- Sint len;
-} BMFindFirstState;
-
-#define BM_OK 0 /* used only for find_all */
-#define BM_NOT_FOUND -1
-#define BM_RESTART -2
-#define BM_LOOP_FACTOR 10 /* Should we have a higher value? */
-
-static void bm_init_find_first_match(BMFindFirstState *state, Sint startpos, Uint len)
-{
- state->pos = startpos;
- state->len = (Sint) len;
-}
-
-
-static Sint bm_find_first_match(BMFindFirstState *state, BMData *bmd, byte *haystack, Uint *reductions)
-{
- Sint blen = bmd->len;
- Sint len = state->len;
- Sint *gs = bmd->goodshift;
- Sint *bs = bmd->badshift;
- byte *needle = bmd->x;
- Sint i;
- Sint j = state->pos;
- register Uint reds = *reductions;
-
- while (j <= len - blen) {
- if (--reds == 0) {
- state->pos = j;
- return BM_RESTART;
- }
- for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i)
- ;
- if (i < 0) { /* found */
- *reductions = reds;
- return j;
- }
- j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i);
- }
- *reductions = reds;
- return BM_NOT_FOUND;
-}
-
-typedef struct {
- Sint pos;
- Sint len;
- Uint m;
- Uint allocated;
- FindallData *out;
-} BMFindAllState;
-
-static void bm_init_find_all(BMFindAllState *state, Sint startpos, Uint len)
-{
- state->pos = startpos;
- state->len = (Sint) len;
- state->m = 0;
- state->allocated = 0;
- state->out = NULL;
-}
-
-static void bm_restore_find_all(BMFindAllState *state, char *buff)
-{
- memcpy(state,buff,sizeof(BMFindAllState));
- state->out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * (state->allocated));
- memcpy(state->out,buff+sizeof(BMFindAllState),sizeof(FindallData)*state->m);
-}
-
-static void bm_serialize_find_all(BMFindAllState *state, char *buff)
-{
- memcpy(buff,state,sizeof(BMFindAllState));
- memcpy(buff+sizeof(BMFindAllState),state->out,sizeof(FindallData)*state->m);
-}
-
-static void bm_clean_find_all(BMFindAllState *state)
-{
- if (state->out != NULL) {
- erts_free(ERTS_ALC_T_TMP, state->out);
- }
-#ifdef HARDDEBUG
- state->out = NULL;
- state->allocated = 0;
-#endif
-}
-
-#define SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(S) (sizeof(BMFindAllState)+(sizeof(FindallData)*(S).m))
-
-/*
- * Differs to the find_first function in that it stores all matches and the values
- * arte returned only in the state.
- */
-static Sint bm_find_all_non_overlapping(BMFindAllState *state,
- BMData *bmd, byte *haystack, Uint *reductions)
-{
- Sint blen = bmd->len;
- Sint len = state->len;
- Sint *gs = bmd->goodshift;
- Sint *bs = bmd->badshift;
- byte *needle = bmd->x;
- Sint i;
- Sint j = state->pos;
- Uint m = state->m;
- Uint allocated = state->allocated;
- FindallData *out = state->out;
- register Uint reds = *reductions;
-
- while (j <= len - blen) {
- if (--reds == 0) {
- state->pos = j;
- state->m = m;
- state->allocated = allocated;
- state->out = out;
- return BM_RESTART;
- }
- for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i)
- ;
- if (i < 0) { /* found */
- if (m >= allocated) {
- if (!allocated) {
- allocated = 10;
- out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * allocated);
- } else {
- allocated *= 2;
- out = erts_realloc(ERTS_ALC_T_TMP, out,
- sizeof(FindallData) * allocated);
- }
- }
- out[m].pos = j;
- out[m].len = blen;
- ++m;
- j += blen;
- } else {
- j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i);
- }
- }
- state->m = m;
- state->out = out;
- *reductions = reds;
- return (m == 0) ? BM_NOT_FOUND : BM_OK;
-}
-
-/*
- * Interface functions (i.e. "bif's")
- */
-
-/*
- * Search functionality interfaces
- */
-
-static int do_binary_match_compile(Eterm argument, Eterm *tag, Binary **binp)
-{
- Eterm t, b, comp_term = NIL;
- Uint characters;
- Uint words;
-
- characters = 0;
- words = 0;
-
- if (is_list(argument)) {
- t = argument;
- while (is_list(t)) {
- b = CAR(list_val(t));
- t = CDR(list_val(t));
- if (!is_binary(b)) {
- goto badarg;
- }
- if (binary_bitsize(b) != 0) {
- goto badarg;
- }
- ++words;
- characters += binary_size(b);
- }
- if (is_not_nil(t)) {
- goto badarg;
- }
- if (words > 1) {
- comp_term = argument;
- } else {
- comp_term = CAR(list_val(argument));
- }
- } else if (is_binary(argument)) {
- if (binary_bitsize(argument) != 0) {
- goto badarg;
- }
- words = 1;
- comp_term = argument;
- characters = binary_size(argument);
- }
-
- if (characters == 0) {
- goto badarg;
- }
- ASSERT(words > 0);
-
- if (words == 1) {
- byte *bytes;
- Uint bitoffs, bitsize;
- byte *temp_alloc = NULL;
- MyAllocator my;
- BMData *bmd;
- Binary *bin;
-
- ERTS_GET_BINARY_BYTES(comp_term, bytes, bitoffs, bitsize);
- if (bitoffs != 0) {
- bytes = erts_get_aligned_binary_bytes(comp_term, &temp_alloc);
- }
- bmd = create_bmdata(&my, bytes, characters, &bin);
- compute_badshifts(bmd);
- compute_goodshifts(bmd);
- erts_free_aligned_binary_bytes(temp_alloc);
- CHECK_ALLOCATOR(my);
- *tag = am_bm;
- *binp = bin;
- return 0;
- } else {
- ACTrie *act;
- MyAllocator my;
- ACNode **qbuff;
- Binary *bin;
-
- act = create_acdata(&my, characters, &qbuff, &bin);
- t = comp_term;
- while (is_list(t)) {
- byte *bytes;
- Uint bitoffs, bitsize;
- byte *temp_alloc = NULL;
- b = CAR(list_val(t));
- t = CDR(list_val(t));
- ERTS_GET_BINARY_BYTES(b, bytes, bitoffs, bitsize);
- if (bitoffs != 0) {
- bytes = erts_get_aligned_binary_bytes(b, &temp_alloc);
- }
- ac_add_one_pattern(&my,act,bytes,binary_size(b));
- erts_free_aligned_binary_bytes(temp_alloc);
- }
- ac_compute_failure_functions(act,qbuff);
- CHECK_ALLOCATOR(my);
- erts_free(ERTS_ALC_T_TMP,qbuff);
- *tag = am_ac;
- *binp = bin;
- return 0;
- }
- badarg:
- return -1;
-}
-
-BIF_RETTYPE binary_compile_pattern_1(BIF_ALIST_1)
-{
- Binary *bin;
- Eterm tag, ret;
- Eterm *hp;
-
- if (do_binary_match_compile(BIF_ARG_1,&tag,&bin)) {
- BIF_ERROR(BIF_P,BADARG);
- }
- hp = HAlloc(BIF_P, PROC_BIN_SIZE+3);
- ret = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin);
- ret = TUPLE2(hp, tag, ret);
- BIF_RET(ret);
-}
-
-#define DO_BIN_MATCH_OK 0
-#define DO_BIN_MATCH_BADARG -1
-#define DO_BIN_MATCH_RESTART -2
-
-static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen,
- Eterm type, Binary *bin, Eterm state_term, Eterm *res_term)
-{
- byte *bytes;
- Uint bitoffs, bitsize;
- byte *temp_alloc = NULL;
-
- ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize);
- if (bitsize != 0) {
- goto badarg;
- }
- if (bitoffs != 0) {
- bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc);
- }
- if (state_term != NIL) {
- Eterm *ptr = big_val(state_term);
- type = ptr[1];
- }
-
- if (type == am_bm) {
- BMData *bm;
- Sint pos;
- Eterm ret;
- Eterm *hp;
- BMFindFirstState state;
- Uint reds = get_reds(p, BM_LOOP_FACTOR);
- Uint save_reds = reds;
-
- bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin);
-#ifdef HARDDEBUG
- dump_bm_data(bm);
-#endif
- if (state_term == NIL) {
- bm_init_find_first_match(&state, hsstart, hslen);
- } else {
- Eterm *ptr = big_val(state_term);
- memcpy(&state,ptr+2,sizeof(state));
- }
-#ifdef HARDDEBUG
- erts_printf("(bm) state->pos = %ld, state->len = %lu\n",state.pos, state.len);
-#endif
- pos = bm_find_first_match(&state, bm, bytes, &reds);
- if (pos == BM_NOT_FOUND) {
- ret = am_nomatch;
- } else if (pos == BM_RESTART) {
- int x = (sizeof(BMFindFirstState) / sizeof(Eterm)) +
- !!(sizeof(BMFindFirstState) % sizeof(Eterm));
-#ifdef HARDDEBUG
- erts_printf("Trap bm!\n");
-#endif
- hp = HAlloc(p,x+2);
- hp[0] = make_pos_bignum_header(x+1);
- hp[1] = type;
- memcpy(hp+2,&state,sizeof(state));
- *res_term = make_big(hp);
- erts_free_aligned_binary_bytes(temp_alloc);
- return DO_BIN_MATCH_RESTART;
- } else {
- Eterm erlen = erts_make_integer((Uint) bm->len, p);
- ret = erts_make_integer(pos,p);
- hp = HAlloc(p,3);
- ret = TUPLE2(hp, ret, erlen);
- }
- erts_free_aligned_binary_bytes(temp_alloc);
- BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR);
- *res_term = ret;
- return DO_BIN_MATCH_OK;
- } else if (type == am_ac) {
- ACTrie *act;
- Uint pos, rlen;
- int acr;
- ACFindFirstState state;
- Eterm ret;
- Eterm *hp;
- Uint reds = get_reds(p, AC_LOOP_FACTOR);
- Uint save_reds = reds;
-
- act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin);
-#ifdef HARDDEBUG
- dump_ac_trie(act);
-#endif
- if (state_term == NIL) {
- ac_init_find_first_match(&state, act, hsstart, hslen);
- } else {
- Eterm *ptr = big_val(state_term);
- memcpy(&state,ptr+2,sizeof(state));
- }
- acr = ac_find_first_match(&state, bytes, &pos, &rlen, &reds);
- if (acr == AC_NOT_FOUND) {
- ret = am_nomatch;
- } else if (acr == AC_RESTART) {
- int x = (sizeof(state) / sizeof(Eterm)) +
- !!(sizeof(BMFindFirstState) % sizeof(Eterm));
-#ifdef HARDDEBUG
- erts_printf("Trap ac!\n");
-#endif
- hp = HAlloc(p,x+2);
- hp[0] = make_pos_bignum_header(x+1);
- hp[1] = type;
- memcpy(hp+2,&state,sizeof(state));
- *res_term = make_big(hp);
- erts_free_aligned_binary_bytes(temp_alloc);
- return DO_BIN_MATCH_RESTART;
- } else {
- Eterm epos = erts_make_integer(pos+hsstart,p);
- Eterm erlen = erts_make_integer(rlen,p);
- hp = HAlloc(p,3);
- ret = TUPLE2(hp, epos, erlen);
- }
- erts_free_aligned_binary_bytes(temp_alloc);
- BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR);
- *res_term = ret;
- return DO_BIN_MATCH_OK;
- }
- badarg:
- return DO_BIN_MATCH_BADARG;
-}
-
-static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, Uint hslen,
- Eterm type, Binary *bin, Eterm state_term, Eterm *res_term)
-{
- byte *bytes;
- Uint bitoffs, bitsize;
- byte *temp_alloc = NULL;
-
- ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize);
- if (bitsize != 0) {
- goto badarg;
- }
- if (bitoffs != 0) {
- bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc);
- }
- if (state_term != NIL) {
- Eterm *ptr = big_val(state_term);
- type = ptr[1];
- }
-
- if (type == am_bm) {
- BMData *bm;
- Sint pos;
- Eterm ret,tpl;
- Eterm *hp;
- BMFindAllState state;
- Uint reds = get_reds(p, BM_LOOP_FACTOR);
- Uint save_reds = reds;
-
- bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin);
-#ifdef HARDDEBUG
- dump_bm_data(bm);
-#endif
- if (state_term == NIL) {
- bm_init_find_all(&state, hsstart, hslen);
- } else {
- Eterm *ptr = big_val(state_term);
- bm_restore_find_all(&state,(char *) (ptr+2));
- }
-
- pos = bm_find_all_non_overlapping(&state, bm, bytes, &reds);
- if (pos == BM_NOT_FOUND) {
- ret = NIL;
- } else if (pos == BM_RESTART) {
- int x = (SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) +
- !!(SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm));
-#ifdef HARDDEBUG
- erts_printf("Trap bm!\n");
-#endif
- hp = HAlloc(p,x+2);
- hp[0] = make_pos_bignum_header(x+1);
- hp[1] = type;
- bm_serialize_find_all(&state, (char *) (hp+2));
- *res_term = make_big(hp);
- erts_free_aligned_binary_bytes(temp_alloc);
- bm_clean_find_all(&state);
- return DO_BIN_MATCH_RESTART;
- } else {
- FindallData *fad = state.out;
- int i;
- for (i = 0; i < state.m; ++i) {
- fad[i].epos = erts_make_integer(fad[i].pos,p);
- fad[i].elen = erts_make_integer(fad[i].len,p);
- }
- hp = HAlloc(p,state.m * (3 + 2));
- ret = NIL;
- for (i = state.m - 1; i >= 0; --i) {
- tpl = TUPLE2(hp, fad[i].epos, fad[i].elen);
- hp +=3;
- ret = CONS(hp,tpl,ret);
- hp += 2;
- }
- }
- erts_free_aligned_binary_bytes(temp_alloc);
- bm_clean_find_all(&state);
- BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR);
- *res_term = ret;
- return DO_BIN_MATCH_OK;
- } else if (type == am_ac) {
- ACTrie *act;
- int acr;
- ACFindAllState state;
- Eterm ret,tpl;
- Eterm *hp;
- Uint reds = get_reds(p, AC_LOOP_FACTOR);
- Uint save_reds = reds;
-
- act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin);
-#ifdef HARDDEBUG
- dump_ac_trie(act);
-#endif
- if (state_term == NIL) {
- ac_init_find_all(&state, act, hsstart, hslen);
- } else {
- Eterm *ptr = big_val(state_term);
- ac_restore_find_all(&state,(char *) (ptr+2));
- }
- acr = ac_find_all_non_overlapping(&state, bytes, &reds);
- if (acr == AC_NOT_FOUND) {
- ret = NIL;
- } else if (acr == AC_RESTART) {
- int x = (SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) +
- !!(SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm));
-#ifdef HARDDEBUG
- erts_printf("Trap ac!\n");
-#endif
- hp = HAlloc(p,x+2);
- hp[0] = make_pos_bignum_header(x+1);
- hp[1] = type;
- ac_serialize_find_all(&state, (char *) (hp+2));
- *res_term = make_big(hp);
- erts_free_aligned_binary_bytes(temp_alloc);
- ac_clean_find_all(&state);
- return DO_BIN_MATCH_RESTART;
- } else {
- FindallData *fad = state.out;
- int i;
- for (i = 0; i < state.m; ++i) {
- fad[i].epos = erts_make_integer(fad[i].pos,p);
- fad[i].elen = erts_make_integer(fad[i].len,p);
- }
- hp = HAlloc(p,state.m * (3 + 2));
- ret = NIL;
- for (i = state.m - 1; i >= 0; --i) {
- tpl = TUPLE2(hp, fad[i].epos, fad[i].elen);
- hp +=3;
- ret = CONS(hp,tpl,ret);
- hp += 2;
- }
- }
- erts_free_aligned_binary_bytes(temp_alloc);
- ac_clean_find_all(&state);
- BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR);
- *res_term = ret;
- return DO_BIN_MATCH_OK;
- }
- badarg:
- return DO_BIN_MATCH_BADARG;
-}
-
-static BIF_RETTYPE binary_match_trap(BIF_ALIST_3)
-{
- int runres;
- Eterm result;
- Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val;
- runres = do_binary_match(BIF_P,BIF_ARG_1,0,0,NIL,bin,BIF_ARG_2,&result);
- switch (runres) {
- case DO_BIN_MATCH_OK:
- BIF_RET(result);
- case DO_BIN_MATCH_RESTART:
- BUMP_ALL_REDS(BIF_P);
- BIF_TRAP3(&binary_match_trap_export, BIF_P, BIF_ARG_1, result, BIF_ARG_3);
- default:
- goto badarg;
- }
- badarg:
- BIF_ERROR(BIF_P,BADARG);
-}
-
-static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3)
-{
- int runres;
- Eterm result;
- Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val;
- runres = do_binary_matches(BIF_P,BIF_ARG_1,0,0,NIL,bin,BIF_ARG_2,&result);
- switch (runres) {
- case DO_BIN_MATCH_OK:
- BIF_RET(result);
- case DO_BIN_MATCH_RESTART:
- BUMP_ALL_REDS(BIF_P);
- BIF_TRAP3(&binary_matches_trap_export, BIF_P, BIF_ARG_1, result, BIF_ARG_3);
- default:
- goto badarg;
- }
- badarg:
- BIF_ERROR(BIF_P,BADARG);
-}
-
-
-BIF_RETTYPE binary_match_3(BIF_ALIST_3)
-{
- Uint hsstart, hslen;
- Eterm *tp;
- Eterm type;
- Binary *bin;
- Eterm bin_term = NIL;
- int runres;
- Eterm result;
-
- if (is_not_binary(BIF_ARG_1)) {
- goto badarg;
- }
- if (BIF_ARG_3 == ((Eterm) 0)) {
- /* Invalid term, we're called from binary_match_2... */
- hsstart = 0;
- hslen = binary_size(BIF_ARG_1);
- } else if (is_list(BIF_ARG_3)) {
- Eterm l = BIF_ARG_3;
- while(is_list(l)) {
- Eterm t = CAR(list_val(l));
- if (!is_tuple(t)) {
- goto badarg;
- }
- tp = tuple_val(t);
- if (arityval(*tp) != 2) {
- goto badarg;
- }
- if (!term_to_Uint(tp[1], &hsstart) || ((hsstart >> 16) >> 16) != 0) {
- goto badarg;
- }
- if (!term_to_Uint(tp[2], &hslen) || ((hslen >> 16) >> 16) != 0) {
- goto badarg;
- }
- if (hslen < hsstart) {
- goto badarg;
- }
- if (hslen > binary_size(BIF_ARG_1)-1) {
- goto badarg; /* XXX:PaN or should we take as much as we have ? */
- }
- hslen = hslen + 1 - hsstart;
- l = CDR(list_val(l));
- }
- } else if (BIF_ARG_3 != NIL) {
- goto badarg;
- }
- if (hslen == 0) {
- BIF_RET(am_nomatch);
- }
- if (is_tuple(BIF_ARG_2)) {
- tp = tuple_val(BIF_ARG_2);
- if (arityval(*tp) != 2 || is_not_atom(tp[1])) {
- goto badarg;
- }
- if (((tp[1] != am_bm) && (tp[1] != am_ac)) ||
- !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) {
- goto badarg;
- }
- type = tp[1];
- bin = ((ProcBin *) binary_val(tp[2]))->val;
- if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) {
- goto badarg;
- }
- bin_term = tp[2];
- } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) {
- goto badarg;
- }
- runres = do_binary_match(BIF_P,BIF_ARG_1,hsstart,hslen,type,bin,NIL,&result);
- if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) {
- Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE);
- bin_term = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin);
- } else if (bin_term == NIL) {
- erts_bin_free(bin);
- }
- switch (runres) {
- case DO_BIN_MATCH_OK:
- BIF_RET(result);
- case DO_BIN_MATCH_RESTART:
- BUMP_ALL_REDS(BIF_P);
- BIF_TRAP3(&binary_match_trap_export, BIF_P, BIF_ARG_1, result, bin_term);
- default:
- goto badarg;
- }
- badarg:
- BIF_ERROR(BIF_P,BADARG);
-}
-
-BIF_RETTYPE binary_matches_3(BIF_ALIST_3)
-{
- Uint hsstart, hslen;
- Eterm *tp;
- Eterm type;
- Binary *bin;
- Eterm bin_term = NIL;
- int runres;
- Eterm result;
-
- if (is_not_binary(BIF_ARG_1)) {
- goto badarg;
- }
- if (BIF_ARG_3 == ((Eterm) 0)) {
- /* Invalid term, we're called from binary_matches_2... */
- hsstart = 0;
- hslen = binary_size(BIF_ARG_1);
- } else if (is_list(BIF_ARG_3)) {
- Eterm l = BIF_ARG_3;
- while(is_list(l)) {
- Eterm t = CAR(list_val(l));
- if (!is_tuple(t)) {
- goto badarg;
- }
- tp = tuple_val(t);
- if (arityval(*tp) != 2) {
- goto badarg;
- }
- if (!term_to_Uint(tp[1], &hsstart) || ((hsstart >> 16) >> 16) != 0) {
- goto badarg;
- }
- if (!term_to_Uint(tp[2], &hslen) || ((hslen >> 16) >> 16) != 0) {
- goto badarg;
- }
- if (hslen < hsstart) {
- goto badarg;
- }
- if (hslen > binary_size(BIF_ARG_1)-1) {
- goto badarg; /* XXX:PaN or should we take as much as we have ? */
- }
- hslen = hslen + 1 - hsstart;
- l = CDR(list_val(l));
- }
- } else if (BIF_ARG_3 != NIL) {
- goto badarg;
- }
- if (hslen == 0) {
- BIF_RET(am_nomatch);
- }
- if (is_tuple(BIF_ARG_2)) {
- tp = tuple_val(BIF_ARG_2);
- if (arityval(*tp) != 2 || is_not_atom(tp[1])) {
- goto badarg;
- }
- if (((tp[1] != am_bm) && (tp[1] != am_ac)) ||
- !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) {
- goto badarg;
- }
- type = tp[1];
- bin = ((ProcBin *) binary_val(tp[2]))->val;
- if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) {
- goto badarg;
- }
- bin_term = tp[2];
- } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) {
- goto badarg;
- }
- runres = do_binary_matches(BIF_P,BIF_ARG_1,hsstart,hslen,type,bin,NIL,&result);
- if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) {
- Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE);
- bin_term = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin);
- } else if (bin_term == NIL) {
- erts_bin_free(bin);
- }
- switch (runres) {
- case DO_BIN_MATCH_OK:
- BIF_RET(result);
- case DO_BIN_MATCH_RESTART:
- BUMP_ALL_REDS(BIF_P);
- BIF_TRAP3(&binary_matches_trap_export, BIF_P, BIF_ARG_1, result, bin_term);
- default:
- goto badarg;
- }
- badarg:
- BIF_ERROR(BIF_P,BADARG);
-}
-
-
-BIF_RETTYPE binary_match_2(BIF_ALIST_2)
-{
- return binary_match_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0));
-}
-
-
-BIF_RETTYPE binary_matches_2(BIF_ALIST_2)
-{
- return binary_matches_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0));
-}
-
-/*
- * Hard debug functions (dump) for the search structures
- */
-
-#ifdef HARDDEBUG
-static void dump_bm_data(BMData *bm)
-{
- int i,j;
- erts_printf("Dumping Boyer-More structure.\n");
- erts_printf("=============================\n");
- erts_printf("Searchstring [%ld]:\n", bm->len);
- erts_printf("<<");
- for (i = 0; i < bm->len; ++i) {
- if (i > 0) {
- erts_printf(", ");
- }
- erts_printf("%d", (int) bm->x[i]);
- if (bm->x[i] >= 'A') {
- erts_printf(" ($%c)",(char) bm->x[i]);
- }
- }
- erts_printf(">>\n");
- erts_printf("GoodShift array:\n");
- for (i = 0; i < bm->len; ++i) {
- erts_printf("GoodShift[%d]: %ld\n", i, bm->goodshift[i]);
- }
- erts_printf("BadShift array:\n");
- j = 0;
- for (i = 0; i < ALPHABET_SIZE; i += j) {
- for (j = 0; i + j < ALPHABET_SIZE && j < 6; ++j) {
- erts_printf("BS[%03d]:%02ld, ", i+j, bm->badshift[i+j]);
- }
- erts_printf("\n");
- }
-}
-
-static void dump_ac_node(ACNode *node, int indent, int ch) {
- int i;
- char *spaces = erts_alloc(ERTS_ALC_T_TMP, 10 * indent + 1);
- memset(spaces,' ',10*indent);
- spaces[10*indent] = '\0';
- erts_printf("%s-> %c\n",spaces,ch);
- erts_printf("%sId: %u\n",spaces,(unsigned) node->id);
- erts_printf("%sD: %u\n",spaces,(unsigned)node->d);
- erts_printf("%sFinal: %d\n",spaces,(int)node->final);
- erts_printf("%sFail: %u\n",spaces,(unsigned)node->h->id);
- erts_free(ERTS_ALC_T_TMP,spaces);
- for(i=0;i<ALPHABET_SIZE;++i) {
- if (node->g[i] != NULL && node->g[i] != node) {
- dump_ac_node(node->g[i],indent+1,i);
- }
- }
-}
-
-
-static void dump_ac_trie(ACTrie *act)
-{
- erts_printf("Aho Corasick Trie dump.\n");
- erts_printf("=======================\n");
- erts_printf("Node counter: %u\n", (unsigned) act->idc);
- erts_printf("Searchstring counter: %u\n", (unsigned) act->counter);
- erts_printf("Trie:\n");
- dump_ac_node(act->root, 0, '0');
- return;
-}
-#endif
diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c
new file mode 100644
index 0000000000..63c82443c5
--- /dev/null
+++ b/erts/emulator/beam/erl_bif_binary.c
@@ -0,0 +1,1548 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 1996-2010. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * NOTE: This file contains the BIF's for the *module* binary in stdlib.
+ * other BIF's concerning binaries are in binary.c.
+ */
+
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "sys.h"
+#include "erl_vm.h"
+#include "global.h"
+#include "erl_process.h"
+#include "error.h"
+#include "bif.h"
+#include "big.h"
+#include "erl_binary.h"
+#include "erl_bits.h"
+
+
+/*
+ * The native implementation functions for the module binary.
+ * Searching is implemented using aither Boyer-More or Aho-Corasick
+ * depending on number of searchstrings (BM if one, AC if more than one).
+ * Native implementation is mostly for efficiency, nothing
+ * (except binary:referenced_byte_size) really *needs* to be implemented
+ * in native code.
+ */
+
+/* #define HARDDEBUG */
+
+/* Init and local variables */
+
+static Export binary_match_trap_export;
+static BIF_RETTYPE binary_match_trap(BIF_ALIST_3);
+static Export binary_matches_trap_export;
+static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3);
+static Uint max_loop_limit;
+
+
+void erts_init_bif_binary(void)
+{
+ sys_memset((void *) &binary_match_trap_export, 0, sizeof(Export));
+ binary_match_trap_export.address = &binary_match_trap_export.code[3];
+ binary_match_trap_export.code[0] = am_erlang;
+ binary_match_trap_export.code[1] = am_binary_match_trap;
+ binary_match_trap_export.code[2] = 3;
+ binary_match_trap_export.code[3] = (BeamInstr) em_apply_bif;
+ binary_match_trap_export.code[4] = (BeamInstr) &binary_match_trap;
+
+ sys_memset((void *) &binary_matches_trap_export, 0, sizeof(Export));
+ binary_matches_trap_export.address = &binary_matches_trap_export.code[3];
+ binary_matches_trap_export.code[0] = am_erlang;
+ binary_matches_trap_export.code[1] = am_binary_matches_trap;
+ binary_matches_trap_export.code[2] = 3;
+ binary_matches_trap_export.code[3] = (BeamInstr) em_apply_bif;
+ binary_matches_trap_export.code[4] = (BeamInstr) &binary_matches_trap;
+
+ max_loop_limit = 0;
+ return;
+}
+
+/*
+ * Setting the loop_limit for searches for debugging
+ */
+Sint erts_binary_set_loop_limit(Sint limit)
+{
+ Sint save = (Sint) max_loop_limit;
+ if (limit <= 0) {
+ max_loop_limit = 0;
+ } else {
+ max_loop_limit = (Uint) limit;
+ }
+ return save;
+}
+
+static Uint get_reds(Process *p, int loop_factor)
+{
+ Uint reds = ERTS_BIF_REDS_LEFT(p) * loop_factor;
+ Uint tmp = max_loop_limit;
+ if (tmp != 0 && tmp < reds) {
+ return tmp;
+ }
+ return reds;
+}
+
+/*
+ * A micro allocator used when building search structures, just a convenience
+ * for building structures inside a pre alocated magic binary using
+ * conventional malloc-like interface.
+ */
+
+#define MYALIGN(Size) (SIZEOF_VOID_P * (((Size) / SIZEOF_VOID_P) + \
+ !!(((Size) % SIZEOF_VOID_P))))
+
+#ifdef DEBUG
+#define CHECK_ALLOCATOR(My) ASSERT((My).current <= ((My).mem + (My).size))
+#else
+#define CHECK_ALLOCATOR(My) /* nothing */
+#endif
+
+typedef struct _my_allocator {
+ Uint size;
+ byte *current;
+ byte *mem;
+} MyAllocator;
+
+static void init_my_allocator(MyAllocator *my, Uint siz, byte *array)
+{
+ ASSERT((siz % SIZEOF_VOID_P) == 0);
+ my->size = siz;
+ my->mem = array;
+ my->current = my->mem;
+}
+
+static void *my_alloc(MyAllocator *my, Uint size)
+{
+ void *ptr = my->current;
+ my->current += MYALIGN(size);
+ return ptr;
+}
+
+/*
+ * The search functionality.
+ *
+ * The search is byte oriented, which works nicely for UTF-8 as well as
+ * latin1 data
+ */
+
+#define ALPHABET_SIZE 256
+
+typedef struct _ac_node {
+#ifdef HARDDEBUG
+ Uint32 id; /* To identify h pointer targets when
+ dumping */
+#endif
+ Uint32 d; /* Depth in trie, also represents the
+ length (-1) of the matched string if
+ in final set */
+ Sint32 final; /* Members in final set represent
+ * matches.
+ * The set representation is scattered
+ * among the nodes in this way:
+ * >0 -> this represents a member of
+ * the final set, <0 -> member of
+ * final set somewhere in the failure
+ * chain,
+ * 0 -> not member of the final set */
+ struct _ac_node *h; /* h(Hode) is the failure function */
+ struct _ac_node *g[ALPHABET_SIZE]; /* g(Node,Character) is the
+ transition function */
+} ACNode;
+
+typedef struct _ac_trie {
+#ifdef HARDDEBUG
+ Uint32 idc;
+#endif
+ Uint32 counter; /* Number of added patterns */
+ ACNode *root; /* pointer to the root state */
+} ACTrie;
+
+typedef struct _bm_data {
+ byte *x;
+ Sint len;
+ Sint *goodshift;
+ Sint badshift[ALPHABET_SIZE];
+} BMData;
+
+#ifdef HARDDEBUG
+static void dump_bm_data(BMData *bm);
+static void dump_ac_trie(ACTrie *act);
+static void dump_ac_node(ACNode *node, int indent, int ch);
+#endif
+
+/*
+ * The needed size of binary data for a search structure - given the
+ * accumulated string lengths.
+ */
+#define BM_SIZE(StrLen) /* StrLen: length of searchstring */ \
+((MYALIGN(sizeof(Sint) * (StrLen))) + /* goodshift array */ \
+ MYALIGN(StrLen) + /* searchstring saved */ \
+ (MYALIGN(sizeof(BMData)))) /* Structure */
+
+#define AC_SIZE(StrLens) /* StrLens: sum of all searchstring lengths */ \
+((MYALIGN(sizeof(ACNode)) * \
+((StrLens)+1)) + /* The actual nodes (including rootnode) */ \
+ MYALIGN(sizeof(ACTrie))) /* Structure */
+
+
+#ifndef MAX
+#define MAX(A,B) (((A) > (B)) ? (A) : B)
+#endif
+
+/*
+ * Callback for the magic binary
+ */
+static void cleanup_my_data(Binary *bp)
+{
+ return;
+}
+
+/*
+ * Initiate a (allocated) micro allocator and fill in the base
+ * for an Aho-Corasick search trie, given the accumulated length of the search
+ * strings.
+ */
+static ACTrie *create_acdata(MyAllocator *my, Uint len,
+ ACNode ***qbuff /* out */,
+ Binary **the_bin /* out */)
+{
+ Uint datasize = AC_SIZE(len);
+ ACTrie *act;
+ ACNode *acn;
+ Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data);
+ byte *data = ERTS_MAGIC_BIN_DATA(mb);
+
+ init_my_allocator(my, datasize, data);
+ act = my_alloc(my, sizeof(ACTrie)); /* Important that this is the first
+ allocation */
+ act->counter = 0;
+ act->root = acn = my_alloc(my, sizeof(ACNode));
+ acn->d = 0;
+ acn->final = 0;
+ acn->h = NULL;
+ memset(acn->g, 0, sizeof(ACNode *) * ALPHABET_SIZE);
+#ifdef HARDDEBUG
+ act->idc = 0;
+ acn->id = 0;
+#endif
+ *qbuff = erts_alloc(ERTS_ALC_T_TMP, sizeof(ACNode *) * len);
+ *the_bin = mb;
+ return act;
+}
+
+/*
+ * The same initialization of allocator and basic data for Boyer-More.
+ */
+static BMData *create_bmdata(MyAllocator *my, byte *x, Uint len,
+ Binary **the_bin /* out */)
+{
+ Uint datasize = BM_SIZE(len);
+ BMData *bmd;
+ Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data);
+ byte *data = ERTS_MAGIC_BIN_DATA(mb);
+ init_my_allocator(my, datasize, data);
+ bmd = my_alloc(my, sizeof(BMData));
+ bmd->x = my_alloc(my,len);
+ memcpy(bmd->x,x,len);
+ bmd->len = len;
+ bmd->goodshift = my_alloc(my,sizeof(Uint) * len);
+ *the_bin = mb;
+ return bmd;
+}
+
+/*
+ * Compilation of search structures
+ */
+
+/*
+ * Aho Corasick - Build a Trie and fill in the failure functions
+ * when all strings are added.
+ * The algorithm is nicely described by Dieter B�hler of University of
+ * T�bingen:
+ * http://www-sr.informatik.uni-tuebingen.de/~buehler/AC/AC.html
+ */
+
+/*
+ * Helper called once for each search pattern
+ */
+static void ac_add_one_pattern(MyAllocator *my, ACTrie *act, byte *x, Uint len)
+{
+ ACNode *acn = act->root;
+ Uint32 n = ++act->counter; /* Always increase conter, even if it's a
+ duplicate as this may identify the pattern
+ in the final set (not in current interface
+ though) */
+ Uint i = 0;
+
+ while(i < len) {
+ if (acn->g[x[i]] != NULL) {
+ /* node exists, continue */
+ acn = acn->g[x[i]];
+ ++i;
+ } else {
+ /* allocate a new node */
+ ACNode *nn = my_alloc(my,sizeof(ACNode));
+#ifdef HARDDEBUG
+ nn->id = ++(act->idc);
+#endif
+ nn->d = i+1;
+ nn->h = act->root;
+ nn->final = 0;
+ memset(nn->g, 0, sizeof(ACNode *) * ALPHABET_SIZE);
+ acn->g[x[i]] = nn;
+ ++i;
+ acn = nn;
+ }
+ }
+ if (acn->final == 0) { /* New pattern, add to final set */
+ acn->final = n;
+ }
+}
+
+/*
+ * Called when all search patterns are added.
+ */
+static void ac_compute_failure_functions(ACTrie *act, ACNode **qbuff)
+{
+ ACNode *root = act->root;
+ ACNode *parent;
+ int i;
+ int qh = 0,qt = 0;
+ ACNode *child, *r;
+
+ /* Set all children of the root to have the root as failure function */
+ for (i = 0; i < ALPHABET_SIZE; ++i) {
+ if (root->g[i] != NULL) {
+ root->g[i]->h = root;
+ /* Add to que for later traversal */
+ qbuff[qt++] = root->g[i];
+ }
+ }
+
+ /* So, now we've handled children of the root state, traverse the
+ rest of the trie BF... */
+ while (qh < qt) {
+ parent = qbuff[qh++];
+ for (i = 0; i < ALPHABET_SIZE; ++ i) {
+ if ((child = parent->g[i]) != NULL) {
+ /* Visit this node to */
+ qbuff[qt++] = child;
+ /* Search for correct failure function, follow the parents
+ failure function until you find a similar transition
+ funtion to this childs */
+ r = parent->h;
+ while (r != NULL && r->g[i] == NULL) {
+ r = r->h;
+ }
+ if (r == NULL) {
+ /* Replace NULL failures with the root as we go */
+ child->h = (root->g[i] == NULL) ? root : root->g[i];
+ } else {
+ child->h = r->g[i];
+ /*
+ * The "final" set is scattered among the nodes. When
+ * the failure function points to a member of the final
+ * set, we have a match, but we might not see it in the
+ * current node if we dont mark it as a special type of
+ * final, i.e. foolow the failure function and you will
+ * find a real member of final set. This is marked with
+ * a negative string id and only done if this node does
+ * not represent a member in the final set.
+ */
+ if (!(child->final) && (child->h->final)) {
+ child->final = -1;
+ }
+ }
+ }
+ }
+ }
+ /* Finally the failure function of the root should point to itself */
+ root->h = root;
+}
+
+
+/*
+ * The actual searching for needles in the haystack...
+ * Find first match using Aho-Coracick Trie
+ * return pattern number and fill in mpos + mlen if found, otherwise return 0
+ * Return the matching pattern that *starts* first, and ends
+ * last (difference when overlapping), hence the candidate thing.
+ * Basic AC finds the first end before the first start...
+ *
+ */
+typedef struct {
+ ACNode *q;
+ Uint pos;
+ Uint len;
+ ACNode *candidate;
+ Uint candidate_start;
+} ACFindFirstState;
+
+
+static void ac_init_find_first_match(ACFindFirstState *state, ACTrie *act, Sint startpos, Uint len)
+{
+ state->q = act->root;
+ state->pos = startpos;
+ state->len = len;
+ state->candidate = NULL;
+ state->candidate_start = 0;
+}
+#define AC_OK 0
+#define AC_NOT_FOUND -1
+#define AC_RESTART -2
+
+#define AC_LOOP_FACTOR 10
+
+static int ac_find_first_match(ACFindFirstState *state, byte *haystack,
+ Uint *mpos, Uint *mlen, Uint *reductions)
+{
+ ACNode *q = state->q;
+ Uint i = state->pos;
+ ACNode *candidate = state->candidate, *r;
+ Uint len = state->len;
+ Uint candidate_start = state->candidate_start;
+ Uint rstart;
+ register Uint reds = *reductions;
+
+ while (i < len) {
+ if (--reds == 0) {
+ state->q = q;
+ state->pos = i;
+ state->len = len;
+ state->candidate = candidate;
+ state->candidate_start = candidate_start;
+ return AC_RESTART;
+ }
+
+ while (q->g[haystack[i]] == NULL && q->h != q) {
+ q = q->h;
+ }
+ if (q->g[haystack[i]] != NULL) {
+ q = q->g[haystack[i]];
+ }
+#ifdef HARDDEBUG
+ erts_printf("ch = %c, Current: %u\n", (int) haystack[i], (unsigned) q->id);
+#endif
+ ++i;
+ if (candidate != NULL && (i - q->d) > candidate_start) {
+ break;
+ }
+ if (q->final) {
+ r = q;
+ while (r->final < 0)
+ r = r->h;
+ rstart = i - r->d;
+ if (candidate == NULL || rstart < candidate_start ||
+ (rstart == candidate_start && candidate->d < q->d)) {
+ candidate_start = rstart;
+ candidate = r;
+ }
+ }
+ }
+ *reductions = reds;
+ if (!candidate) {
+ return AC_NOT_FOUND;
+ }
+#ifdef HARDDEBUG
+ dump_ac_node(candidate,0,'?');
+#endif
+ *mpos = candidate_start;
+ *mlen = candidate->d;
+ return AC_OK;
+}
+
+typedef struct _findall_data {
+ Uint pos;
+ Uint len;
+#ifdef HARDDEBUG
+ Uint id;
+#endif
+ Eterm epos;
+ Eterm elen;
+} FindallData;
+
+typedef struct {
+ ACNode *q;
+ Uint pos;
+ Uint len;
+ Uint m;
+ Uint allocated;
+ FindallData *out;
+} ACFindAllState;
+
+static void ac_init_find_all(ACFindAllState *state, ACTrie *act, Sint startpos, Uint len)
+{
+ state->q = act->root;
+ state->pos = startpos;
+ state->len = len;
+ state->m = 0;
+ state->allocated = 0;
+ state->out = NULL;
+}
+
+static void ac_restore_find_all(ACFindAllState *state, char *buff)
+{
+ memcpy(state,buff,sizeof(ACFindAllState));
+ state->out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * (state->allocated));
+ memcpy(state->out,buff+sizeof(ACFindAllState),sizeof(FindallData)*state->m);
+}
+
+static void ac_serialize_find_all(ACFindAllState *state, char *buff)
+{
+ memcpy(buff,state,sizeof(ACFindAllState));
+ memcpy(buff+sizeof(ACFindAllState),state->out,sizeof(FindallData)*state->m);
+}
+
+static void ac_clean_find_all(ACFindAllState *state)
+{
+ if (state->out != NULL) {
+ erts_free(ERTS_ALC_T_TMP, state->out);
+ }
+#ifdef HARDDEBUG
+ state->out = NULL;
+ state->allocated = 0;
+#endif
+}
+
+#define SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(S) \
+ (sizeof(ACFindAllState)+(sizeof(FindallData)*(S).m))
+
+/*
+ * Differs to the find_first function in that it stores all matches and the values
+ * arte returned only in the state.
+ */
+static int ac_find_all_non_overlapping(ACFindAllState *state, byte *haystack,
+ Uint *reductions)
+{
+ ACNode *q = state->q;
+ Uint i = state->pos;
+ Uint rstart;
+ ACNode *r;
+ Uint len = state->len;
+ Uint m = state->m, save_m;
+ Uint allocated = state->allocated;
+ FindallData *out = state->out;
+ register Uint reds = *reductions;
+
+
+ while (i < len) {
+ if (--reds == 0) {
+ state->q = q;
+ state->pos = i;
+ state->len = len;
+ state->m = m;
+ state->allocated = allocated;
+ state->out = out;
+ return AC_RESTART;
+ }
+ while (q->g[haystack[i]] == NULL && q->h != q) {
+ q = q->h;
+ }
+ if (q->g[haystack[i]] != NULL) {
+ q = q->g[haystack[i]];
+ }
+ ++i;
+ if (q->final) {
+ r = q;
+ while (r->final) {
+ while (r->final < 0)
+ r = r->h;
+#ifdef HARDDEBUG
+ erts_printf("Trying to add %u\n",(unsigned) r->final);
+#endif
+ rstart = i - r->d;
+ save_m = m;
+ while (m > 0 && (out[m-1].pos > rstart ||
+ (out[m-1].pos == rstart &&
+ out[m-1].len < r->d))) {
+#ifdef HARDDEBUG
+ erts_printf("Popping %u\n",(unsigned) out[m-1].id);
+#endif
+ --m;
+ }
+#ifdef HARDDEBUG
+ if (m > 0) {
+ erts_printf("Pos %u\n",out[m-1].pos);
+ erts_printf("Len %u\n",out[m-1].len);
+ }
+ erts_printf("Rstart %u\n",rstart);
+#endif
+ if (m == 0 || out[m-1].pos + out[m-1].len <= rstart) {
+ if (m >= allocated) {
+ if (!allocated) {
+ allocated = 10;
+ out = erts_alloc(ERTS_ALC_T_TMP,
+ sizeof(FindallData) * allocated);
+ } else {
+ allocated *= 2;
+ out = erts_realloc(ERTS_ALC_T_TMP, out,
+ sizeof(FindallData) *
+ allocated);
+ }
+ }
+ out[m].pos = rstart;
+ out[m].len = r->d;
+#ifdef HARDDEBUG
+ out[m].id = r->final;
+#endif
+ ++m;
+#ifdef HARDDEBUG
+ erts_printf("Pushing %u\n",(unsigned) out[m-1].id);
+#endif
+ } else {
+#ifdef HARDDEBUG
+ erts_printf("Backtracking %d steps\n",save_m - m);
+#endif
+ m = save_m;
+ }
+ r = r->h;
+ }
+ }
+ }
+ *reductions = reds;
+ state->m = m;
+ state->out = out;
+ return (m == 0) ? AC_NOT_FOUND : AC_OK;
+}
+
+/*
+ * Boyer More - most obviously implemented more or less exactly as
+ * Christian Charras and Thierry Lecroq describes it in "Handbook of
+ * Exact String-Matching Algorithms"
+ * http://www-igm.univ-mlv.fr/~lecroq/string/
+ */
+
+/*
+ * Call this to compute badshifts array
+ */
+static void compute_badshifts(BMData *bmd)
+{
+ Sint i;
+ Sint m = bmd->len;
+
+ for (i = 0; i < ALPHABET_SIZE; ++i) {
+ bmd->badshift[i] = m;
+ }
+ for (i = 0; i < m - 1; ++i) {
+ bmd->badshift[bmd->x[i]] = m - i - 1;
+ }
+}
+
+/* Helper for "compute_goodshifts" */
+static void compute_suffixes(byte *x, Sint m, Sint *suffixes)
+{
+ int f,g,i;
+
+ suffixes[m - 1] = m;
+
+ f = 0; /* To avoid use before set warning */
+
+ g = m - 1;
+
+ for (i = m - 2; i >= 0; --i) {
+ if (i > g && suffixes[i + m - f] < i - g) {
+ suffixes[i] = suffixes[i + m - 1 - f];
+ } else {
+ if (i < g) {
+ g = i;
+ }
+ f = i;
+ while ( g >= 0 && x[g] == x[g + m - 1 - f] ) {
+ --g;
+ }
+ suffixes[i] = f - g;
+ }
+ }
+}
+
+/*
+ * Call this to compute goodshift array
+ */
+static void compute_goodshifts(BMData *bmd)
+{
+ Sint m = bmd->len;
+ byte *x = bmd->x;
+ Sint i, j;
+ Sint *suffixes = erts_alloc(ERTS_ALC_T_TMP, m * sizeof(Uint));
+
+ compute_suffixes(x, m, suffixes);
+
+ for (i = 0; i < m; ++i) {
+ bmd->goodshift[i] = m;
+ }
+
+ j = 0;
+
+ for (i = m - 1; i >= -1; --i) {
+ if (i == -1 || suffixes[i] == i + 1) {
+ while (j < m - 1 - i) {
+ if (bmd->goodshift[j] == m) {
+ bmd->goodshift[j] = m - 1 - i;
+ }
+ ++j;
+ }
+ }
+ }
+ for (i = 0; i <= m - 2; ++i) {
+ bmd->goodshift[m - 1 - suffixes[i]] = m - 1 - i;
+ }
+ erts_free(ERTS_ALC_T_TMP, suffixes);
+}
+
+typedef struct {
+ Sint pos;
+ Sint len;
+} BMFindFirstState;
+
+#define BM_OK 0 /* used only for find_all */
+#define BM_NOT_FOUND -1
+#define BM_RESTART -2
+#define BM_LOOP_FACTOR 10 /* Should we have a higher value? */
+
+static void bm_init_find_first_match(BMFindFirstState *state, Sint startpos,
+ Uint len)
+{
+ state->pos = startpos;
+ state->len = (Sint) len;
+}
+
+
+static Sint bm_find_first_match(BMFindFirstState *state, BMData *bmd,
+ byte *haystack, Uint *reductions)
+{
+ Sint blen = bmd->len;
+ Sint len = state->len;
+ Sint *gs = bmd->goodshift;
+ Sint *bs = bmd->badshift;
+ byte *needle = bmd->x;
+ Sint i;
+ Sint j = state->pos;
+ register Uint reds = *reductions;
+
+ while (j <= len - blen) {
+ if (--reds == 0) {
+ state->pos = j;
+ return BM_RESTART;
+ }
+ for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i)
+ ;
+ if (i < 0) { /* found */
+ *reductions = reds;
+ return j;
+ }
+ j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i);
+ }
+ *reductions = reds;
+ return BM_NOT_FOUND;
+}
+
+typedef struct {
+ Sint pos;
+ Sint len;
+ Uint m;
+ Uint allocated;
+ FindallData *out;
+} BMFindAllState;
+
+static void bm_init_find_all(BMFindAllState *state, Sint startpos, Uint len)
+{
+ state->pos = startpos;
+ state->len = (Sint) len;
+ state->m = 0;
+ state->allocated = 0;
+ state->out = NULL;
+}
+
+static void bm_restore_find_all(BMFindAllState *state, char *buff)
+{
+ memcpy(state,buff,sizeof(BMFindAllState));
+ state->out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) *
+ (state->allocated));
+ memcpy(state->out,buff+sizeof(BMFindAllState),
+ sizeof(FindallData)*state->m);
+}
+
+static void bm_serialize_find_all(BMFindAllState *state, char *buff)
+{
+ memcpy(buff,state,sizeof(BMFindAllState));
+ memcpy(buff+sizeof(BMFindAllState),state->out,
+ sizeof(FindallData)*state->m);
+}
+
+static void bm_clean_find_all(BMFindAllState *state)
+{
+ if (state->out != NULL) {
+ erts_free(ERTS_ALC_T_TMP, state->out);
+ }
+#ifdef HARDDEBUG
+ state->out = NULL;
+ state->allocated = 0;
+#endif
+}
+
+#define SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(S) \
+ (sizeof(BMFindAllState)+(sizeof(FindallData)*(S).m))
+
+/*
+ * Differs to the find_first function in that it stores all matches and the
+ * values are returned only in the state.
+ */
+static Sint bm_find_all_non_overlapping(BMFindAllState *state,
+ BMData *bmd, byte *haystack,
+ Uint *reductions)
+{
+ Sint blen = bmd->len;
+ Sint len = state->len;
+ Sint *gs = bmd->goodshift;
+ Sint *bs = bmd->badshift;
+ byte *needle = bmd->x;
+ Sint i;
+ Sint j = state->pos;
+ Uint m = state->m;
+ Uint allocated = state->allocated;
+ FindallData *out = state->out;
+ register Uint reds = *reductions;
+
+ while (j <= len - blen) {
+ if (--reds == 0) {
+ state->pos = j;
+ state->m = m;
+ state->allocated = allocated;
+ state->out = out;
+ return BM_RESTART;
+ }
+ for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i)
+ ;
+ if (i < 0) { /* found */
+ if (m >= allocated) {
+ if (!allocated) {
+ allocated = 10;
+ out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * allocated);
+ } else {
+ allocated *= 2;
+ out = erts_realloc(ERTS_ALC_T_TMP, out,
+ sizeof(FindallData) * allocated);
+ }
+ }
+ out[m].pos = j;
+ out[m].len = blen;
+ ++m;
+ j += blen;
+ } else {
+ j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i);
+ }
+ }
+ state->m = m;
+ state->out = out;
+ *reductions = reds;
+ return (m == 0) ? BM_NOT_FOUND : BM_OK;
+}
+
+/*
+ * Interface functions (i.e. "bif's")
+ */
+
+/*
+ * Search functionality interfaces
+ */
+
+static int do_binary_match_compile(Eterm argument, Eterm *tag, Binary **binp)
+{
+ Eterm t, b, comp_term = NIL;
+ Uint characters;
+ Uint words;
+
+ characters = 0;
+ words = 0;
+
+ if (is_list(argument)) {
+ t = argument;
+ while (is_list(t)) {
+ b = CAR(list_val(t));
+ t = CDR(list_val(t));
+ if (!is_binary(b)) {
+ goto badarg;
+ }
+ if (binary_bitsize(b) != 0) {
+ goto badarg;
+ }
+ ++words;
+ characters += binary_size(b);
+ }
+ if (is_not_nil(t)) {
+ goto badarg;
+ }
+ if (words > 1) {
+ comp_term = argument;
+ } else {
+ comp_term = CAR(list_val(argument));
+ }
+ } else if (is_binary(argument)) {
+ if (binary_bitsize(argument) != 0) {
+ goto badarg;
+ }
+ words = 1;
+ comp_term = argument;
+ characters = binary_size(argument);
+ }
+
+ if (characters == 0) {
+ goto badarg;
+ }
+ ASSERT(words > 0);
+
+ if (words == 1) {
+ byte *bytes;
+ Uint bitoffs, bitsize;
+ byte *temp_alloc = NULL;
+ MyAllocator my;
+ BMData *bmd;
+ Binary *bin;
+
+ ERTS_GET_BINARY_BYTES(comp_term, bytes, bitoffs, bitsize);
+ if (bitoffs != 0) {
+ bytes = erts_get_aligned_binary_bytes(comp_term, &temp_alloc);
+ }
+ bmd = create_bmdata(&my, bytes, characters, &bin);
+ compute_badshifts(bmd);
+ compute_goodshifts(bmd);
+ erts_free_aligned_binary_bytes(temp_alloc);
+ CHECK_ALLOCATOR(my);
+ *tag = am_bm;
+ *binp = bin;
+ return 0;
+ } else {
+ ACTrie *act;
+ MyAllocator my;
+ ACNode **qbuff;
+ Binary *bin;
+
+ act = create_acdata(&my, characters, &qbuff, &bin);
+ t = comp_term;
+ while (is_list(t)) {
+ byte *bytes;
+ Uint bitoffs, bitsize;
+ byte *temp_alloc = NULL;
+ b = CAR(list_val(t));
+ t = CDR(list_val(t));
+ ERTS_GET_BINARY_BYTES(b, bytes, bitoffs, bitsize);
+ if (bitoffs != 0) {
+ bytes = erts_get_aligned_binary_bytes(b, &temp_alloc);
+ }
+ ac_add_one_pattern(&my,act,bytes,binary_size(b));
+ erts_free_aligned_binary_bytes(temp_alloc);
+ }
+ ac_compute_failure_functions(act,qbuff);
+ CHECK_ALLOCATOR(my);
+ erts_free(ERTS_ALC_T_TMP,qbuff);
+ *tag = am_ac;
+ *binp = bin;
+ return 0;
+ }
+ badarg:
+ return -1;
+}
+
+BIF_RETTYPE binary_compile_pattern_1(BIF_ALIST_1)
+{
+ Binary *bin;
+ Eterm tag, ret;
+ Eterm *hp;
+
+ if (do_binary_match_compile(BIF_ARG_1,&tag,&bin)) {
+ BIF_ERROR(BIF_P,BADARG);
+ }
+ hp = HAlloc(BIF_P, PROC_BIN_SIZE+3);
+ ret = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin);
+ ret = TUPLE2(hp, tag, ret);
+ BIF_RET(ret);
+}
+
+#define DO_BIN_MATCH_OK 0
+#define DO_BIN_MATCH_BADARG -1
+#define DO_BIN_MATCH_RESTART -2
+
+static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen,
+ Eterm type, Binary *bin, Eterm state_term,
+ Eterm *res_term)
+{
+ byte *bytes;
+ Uint bitoffs, bitsize;
+ byte *temp_alloc = NULL;
+
+ ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize);
+ if (bitsize != 0) {
+ goto badarg;
+ }
+ if (bitoffs != 0) {
+ bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc);
+ }
+ if (state_term != NIL) {
+ Eterm *ptr = big_val(state_term);
+ type = ptr[1];
+ }
+
+ if (type == am_bm) {
+ BMData *bm;
+ Sint pos;
+ Eterm ret;
+ Eterm *hp;
+ BMFindFirstState state;
+ Uint reds = get_reds(p, BM_LOOP_FACTOR);
+ Uint save_reds = reds;
+
+ bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin);
+#ifdef HARDDEBUG
+ dump_bm_data(bm);
+#endif
+ if (state_term == NIL) {
+ bm_init_find_first_match(&state, hsstart, hslen);
+ } else {
+ Eterm *ptr = big_val(state_term);
+ memcpy(&state,ptr+2,sizeof(state));
+ }
+#ifdef HARDDEBUG
+ erts_printf("(bm) state->pos = %ld, state->len = %lu\n",state.pos,
+ state.len);
+#endif
+ pos = bm_find_first_match(&state, bm, bytes, &reds);
+ if (pos == BM_NOT_FOUND) {
+ ret = am_nomatch;
+ } else if (pos == BM_RESTART) {
+ int x = (sizeof(BMFindFirstState) / sizeof(Eterm)) +
+ !!(sizeof(BMFindFirstState) % sizeof(Eterm));
+#ifdef HARDDEBUG
+ erts_printf("Trap bm!\n");
+#endif
+ hp = HAlloc(p,x+2);
+ hp[0] = make_pos_bignum_header(x+1);
+ hp[1] = type;
+ memcpy(hp+2,&state,sizeof(state));
+ *res_term = make_big(hp);
+ erts_free_aligned_binary_bytes(temp_alloc);
+ return DO_BIN_MATCH_RESTART;
+ } else {
+ Eterm erlen = erts_make_integer((Uint) bm->len, p);
+ ret = erts_make_integer(pos,p);
+ hp = HAlloc(p,3);
+ ret = TUPLE2(hp, ret, erlen);
+ }
+ erts_free_aligned_binary_bytes(temp_alloc);
+ BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR);
+ *res_term = ret;
+ return DO_BIN_MATCH_OK;
+ } else if (type == am_ac) {
+ ACTrie *act;
+ Uint pos, rlen;
+ int acr;
+ ACFindFirstState state;
+ Eterm ret;
+ Eterm *hp;
+ Uint reds = get_reds(p, AC_LOOP_FACTOR);
+ Uint save_reds = reds;
+
+ act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin);
+#ifdef HARDDEBUG
+ dump_ac_trie(act);
+#endif
+ if (state_term == NIL) {
+ ac_init_find_first_match(&state, act, hsstart, hslen);
+ } else {
+ Eterm *ptr = big_val(state_term);
+ memcpy(&state,ptr+2,sizeof(state));
+ }
+ acr = ac_find_first_match(&state, bytes, &pos, &rlen, &reds);
+ if (acr == AC_NOT_FOUND) {
+ ret = am_nomatch;
+ } else if (acr == AC_RESTART) {
+ int x = (sizeof(state) / sizeof(Eterm)) +
+ !!(sizeof(BMFindFirstState) % sizeof(Eterm));
+#ifdef HARDDEBUG
+ erts_printf("Trap ac!\n");
+#endif
+ hp = HAlloc(p,x+2);
+ hp[0] = make_pos_bignum_header(x+1);
+ hp[1] = type;
+ memcpy(hp+2,&state,sizeof(state));
+ *res_term = make_big(hp);
+ erts_free_aligned_binary_bytes(temp_alloc);
+ return DO_BIN_MATCH_RESTART;
+ } else {
+ Eterm epos = erts_make_integer(pos+hsstart,p);
+ Eterm erlen = erts_make_integer(rlen,p);
+ hp = HAlloc(p,3);
+ ret = TUPLE2(hp, epos, erlen);
+ }
+ erts_free_aligned_binary_bytes(temp_alloc);
+ BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR);
+ *res_term = ret;
+ return DO_BIN_MATCH_OK;
+ }
+ badarg:
+ return DO_BIN_MATCH_BADARG;
+}
+
+static int do_binary_matches(Process *p, Eterm subject, Uint hsstart,
+ Uint hslen, Eterm type, Binary *bin,
+ Eterm state_term, Eterm *res_term)
+{
+ byte *bytes;
+ Uint bitoffs, bitsize;
+ byte *temp_alloc = NULL;
+
+ ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize);
+ if (bitsize != 0) {
+ goto badarg;
+ }
+ if (bitoffs != 0) {
+ bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc);
+ }
+ if (state_term != NIL) {
+ Eterm *ptr = big_val(state_term);
+ type = ptr[1];
+ }
+
+ if (type == am_bm) {
+ BMData *bm;
+ Sint pos;
+ Eterm ret,tpl;
+ Eterm *hp;
+ BMFindAllState state;
+ Uint reds = get_reds(p, BM_LOOP_FACTOR);
+ Uint save_reds = reds;
+
+ bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin);
+#ifdef HARDDEBUG
+ dump_bm_data(bm);
+#endif
+ if (state_term == NIL) {
+ bm_init_find_all(&state, hsstart, hslen);
+ } else {
+ Eterm *ptr = big_val(state_term);
+ bm_restore_find_all(&state,(char *) (ptr+2));
+ }
+
+ pos = bm_find_all_non_overlapping(&state, bm, bytes, &reds);
+ if (pos == BM_NOT_FOUND) {
+ ret = NIL;
+ } else if (pos == BM_RESTART) {
+ int x =
+ (SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) +
+ !!(SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm));
+#ifdef HARDDEBUG
+ erts_printf("Trap bm!\n");
+#endif
+ hp = HAlloc(p,x+2);
+ hp[0] = make_pos_bignum_header(x+1);
+ hp[1] = type;
+ bm_serialize_find_all(&state, (char *) (hp+2));
+ *res_term = make_big(hp);
+ erts_free_aligned_binary_bytes(temp_alloc);
+ bm_clean_find_all(&state);
+ return DO_BIN_MATCH_RESTART;
+ } else {
+ FindallData *fad = state.out;
+ int i;
+ for (i = 0; i < state.m; ++i) {
+ fad[i].epos = erts_make_integer(fad[i].pos,p);
+ fad[i].elen = erts_make_integer(fad[i].len,p);
+ }
+ hp = HAlloc(p,state.m * (3 + 2));
+ ret = NIL;
+ for (i = state.m - 1; i >= 0; --i) {
+ tpl = TUPLE2(hp, fad[i].epos, fad[i].elen);
+ hp +=3;
+ ret = CONS(hp,tpl,ret);
+ hp += 2;
+ }
+ }
+ erts_free_aligned_binary_bytes(temp_alloc);
+ bm_clean_find_all(&state);
+ BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR);
+ *res_term = ret;
+ return DO_BIN_MATCH_OK;
+ } else if (type == am_ac) {
+ ACTrie *act;
+ int acr;
+ ACFindAllState state;
+ Eterm ret,tpl;
+ Eterm *hp;
+ Uint reds = get_reds(p, AC_LOOP_FACTOR);
+ Uint save_reds = reds;
+
+ act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin);
+#ifdef HARDDEBUG
+ dump_ac_trie(act);
+#endif
+ if (state_term == NIL) {
+ ac_init_find_all(&state, act, hsstart, hslen);
+ } else {
+ Eterm *ptr = big_val(state_term);
+ ac_restore_find_all(&state,(char *) (ptr+2));
+ }
+ acr = ac_find_all_non_overlapping(&state, bytes, &reds);
+ if (acr == AC_NOT_FOUND) {
+ ret = NIL;
+ } else if (acr == AC_RESTART) {
+ int x =
+ (SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) +
+ !!(SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm));
+#ifdef HARDDEBUG
+ erts_printf("Trap ac!\n");
+#endif
+ hp = HAlloc(p,x+2);
+ hp[0] = make_pos_bignum_header(x+1);
+ hp[1] = type;
+ ac_serialize_find_all(&state, (char *) (hp+2));
+ *res_term = make_big(hp);
+ erts_free_aligned_binary_bytes(temp_alloc);
+ ac_clean_find_all(&state);
+ return DO_BIN_MATCH_RESTART;
+ } else {
+ FindallData *fad = state.out;
+ int i;
+ for (i = 0; i < state.m; ++i) {
+ fad[i].epos = erts_make_integer(fad[i].pos,p);
+ fad[i].elen = erts_make_integer(fad[i].len,p);
+ }
+ hp = HAlloc(p,state.m * (3 + 2));
+ ret = NIL;
+ for (i = state.m - 1; i >= 0; --i) {
+ tpl = TUPLE2(hp, fad[i].epos, fad[i].elen);
+ hp +=3;
+ ret = CONS(hp,tpl,ret);
+ hp += 2;
+ }
+ }
+ erts_free_aligned_binary_bytes(temp_alloc);
+ ac_clean_find_all(&state);
+ BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR);
+ *res_term = ret;
+ return DO_BIN_MATCH_OK;
+ }
+ badarg:
+ return DO_BIN_MATCH_BADARG;
+}
+
+static BIF_RETTYPE binary_match_trap(BIF_ALIST_3)
+{
+ int runres;
+ Eterm result;
+ Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val;
+ runres = do_binary_match(BIF_P,BIF_ARG_1,0,0,NIL,bin,BIF_ARG_2,&result);
+ switch (runres) {
+ case DO_BIN_MATCH_OK:
+ BIF_RET(result);
+ case DO_BIN_MATCH_RESTART:
+ BUMP_ALL_REDS(BIF_P);
+ BIF_TRAP3(&binary_match_trap_export, BIF_P, BIF_ARG_1, result,
+ BIF_ARG_3);
+ default:
+ goto badarg;
+ }
+ badarg:
+ BIF_ERROR(BIF_P,BADARG);
+}
+
+static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3)
+{
+ int runres;
+ Eterm result;
+ Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val;
+ runres = do_binary_matches(BIF_P,BIF_ARG_1,0,0,NIL,bin,BIF_ARG_2,&result);
+ switch (runres) {
+ case DO_BIN_MATCH_OK:
+ BIF_RET(result);
+ case DO_BIN_MATCH_RESTART:
+ BUMP_ALL_REDS(BIF_P);
+ BIF_TRAP3(&binary_matches_trap_export, BIF_P, BIF_ARG_1, result,
+ BIF_ARG_3);
+ default:
+ goto badarg;
+ }
+ badarg:
+ BIF_ERROR(BIF_P,BADARG);
+}
+
+
+BIF_RETTYPE binary_match_3(BIF_ALIST_3)
+{
+ Uint hsstart, hslen;
+ Eterm *tp;
+ Eterm type;
+ Binary *bin;
+ Eterm bin_term = NIL;
+ int runres;
+ Eterm result;
+
+ if (is_not_binary(BIF_ARG_1)) {
+ goto badarg;
+ }
+ if (BIF_ARG_3 == ((Eterm) 0)) {
+ /* Invalid term, we're called from binary_match_2... */
+ hsstart = 0;
+ hslen = binary_size(BIF_ARG_1);
+ } else if (is_list(BIF_ARG_3)) {
+ Eterm l = BIF_ARG_3;
+ while(is_list(l)) {
+ Eterm t = CAR(list_val(l));
+ if (!is_tuple(t)) {
+ goto badarg;
+ }
+ tp = tuple_val(t);
+ if (arityval(*tp) != 2) {
+ goto badarg;
+ }
+ if (!term_to_Uint(tp[1], &hsstart) ||
+ ((hsstart >> 16) >> 15) != 0) {
+ goto badarg;
+ }
+ if (!term_to_Uint(tp[2], &hslen) ||
+ ((hslen >> 16) >> 15) != 0) {
+ goto badarg;
+ }
+ if (hslen < hsstart) {
+ goto badarg;
+ }
+ if (hslen > binary_size(BIF_ARG_1)-1) {
+ goto badarg; /* XXX:PaN or should we take as much as we have ? */
+ }
+ hslen = hslen + 1 - hsstart;
+ l = CDR(list_val(l));
+ }
+ } else if (BIF_ARG_3 != NIL) {
+ goto badarg;
+ }
+ if (hslen == 0) {
+ BIF_RET(am_nomatch);
+ }
+ if (is_tuple(BIF_ARG_2)) {
+ tp = tuple_val(BIF_ARG_2);
+ if (arityval(*tp) != 2 || is_not_atom(tp[1])) {
+ goto badarg;
+ }
+ if (((tp[1] != am_bm) && (tp[1] != am_ac)) ||
+ !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) {
+ goto badarg;
+ }
+ type = tp[1];
+ bin = ((ProcBin *) binary_val(tp[2]))->val;
+ if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) {
+ goto badarg;
+ }
+ bin_term = tp[2];
+ } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) {
+ goto badarg;
+ }
+ runres = do_binary_match(BIF_P,BIF_ARG_1,hsstart,hslen,type,bin,NIL,&result);
+ if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) {
+ Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE);
+ bin_term = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin);
+ } else if (bin_term == NIL) {
+ erts_bin_free(bin);
+ }
+ switch (runres) {
+ case DO_BIN_MATCH_OK:
+ BIF_RET(result);
+ case DO_BIN_MATCH_RESTART:
+ BUMP_ALL_REDS(BIF_P);
+ BIF_TRAP3(&binary_match_trap_export, BIF_P, BIF_ARG_1, result, bin_term);
+ default:
+ goto badarg;
+ }
+ badarg:
+ BIF_ERROR(BIF_P,BADARG);
+}
+
+BIF_RETTYPE binary_matches_3(BIF_ALIST_3)
+{
+ Uint hsstart, hslen;
+ Eterm *tp;
+ Eterm type;
+ Binary *bin;
+ Eterm bin_term = NIL;
+ int runres;
+ Eterm result;
+
+ if (is_not_binary(BIF_ARG_1)) {
+ goto badarg;
+ }
+ if (BIF_ARG_3 == ((Eterm) 0)) {
+ /* Invalid term, we're called from binary_matches_2... */
+ hsstart = 0;
+ hslen = binary_size(BIF_ARG_1);
+ } else if (is_list(BIF_ARG_3)) {
+ Eterm l = BIF_ARG_3;
+ while(is_list(l)) {
+ Eterm t = CAR(list_val(l));
+ if (!is_tuple(t)) {
+ goto badarg;
+ }
+ tp = tuple_val(t);
+ if (arityval(*tp) != 2) {
+ goto badarg;
+ }
+ if (!term_to_Uint(tp[1], &hsstart) ||
+ ((hsstart >> 16) >> 15) != 0) {
+ goto badarg;
+ }
+ if (!term_to_Uint(tp[2], &hslen) ||
+ ((hslen >> 16) >> 15) != 0) {
+ goto badarg;
+ }
+ if (hslen < hsstart) {
+ goto badarg;
+ }
+ if (hslen > binary_size(BIF_ARG_1)-1) {
+ goto badarg; /* XXX:PaN or should we take as much as we
+ have ? */
+ }
+ hslen = hslen + 1 - hsstart;
+ l = CDR(list_val(l));
+ }
+ } else if (BIF_ARG_3 != NIL) {
+ goto badarg;
+ }
+ if (hslen == 0) {
+ BIF_RET(am_nomatch);
+ }
+ if (is_tuple(BIF_ARG_2)) {
+ tp = tuple_val(BIF_ARG_2);
+ if (arityval(*tp) != 2 || is_not_atom(tp[1])) {
+ goto badarg;
+ }
+ if (((tp[1] != am_bm) && (tp[1] != am_ac)) ||
+ !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) {
+ goto badarg;
+ }
+ type = tp[1];
+ bin = ((ProcBin *) binary_val(tp[2]))->val;
+ if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) {
+ goto badarg;
+ }
+ bin_term = tp[2];
+ } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) {
+ goto badarg;
+ }
+ runres = do_binary_matches(BIF_P,BIF_ARG_1,hsstart,hslen,type,bin,
+ NIL,&result);
+ if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) {
+ Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE);
+ bin_term = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin);
+ } else if (bin_term == NIL) {
+ erts_bin_free(bin);
+ }
+ switch (runres) {
+ case DO_BIN_MATCH_OK:
+ BIF_RET(result);
+ case DO_BIN_MATCH_RESTART:
+ BUMP_ALL_REDS(BIF_P);
+ BIF_TRAP3(&binary_matches_trap_export, BIF_P, BIF_ARG_1, result,
+ bin_term);
+ default:
+ goto badarg;
+ }
+ badarg:
+ BIF_ERROR(BIF_P,BADARG);
+}
+
+
+BIF_RETTYPE binary_match_2(BIF_ALIST_2)
+{
+ return binary_match_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0));
+}
+
+
+BIF_RETTYPE binary_matches_2(BIF_ALIST_2)
+{
+ return binary_matches_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0));
+}
+
+/*
+ * Hard debug functions (dump) for the search structures
+ */
+
+#ifdef HARDDEBUG
+static void dump_bm_data(BMData *bm)
+{
+ int i,j;
+ erts_printf("Dumping Boyer-More structure.\n");
+ erts_printf("=============================\n");
+ erts_printf("Searchstring [%ld]:\n", bm->len);
+ erts_printf("<<");
+ for (i = 0; i < bm->len; ++i) {
+ if (i > 0) {
+ erts_printf(", ");
+ }
+ erts_printf("%d", (int) bm->x[i]);
+ if (bm->x[i] >= 'A') {
+ erts_printf(" ($%c)",(char) bm->x[i]);
+ }
+ }
+ erts_printf(">>\n");
+ erts_printf("GoodShift array:\n");
+ for (i = 0; i < bm->len; ++i) {
+ erts_printf("GoodShift[%d]: %ld\n", i, bm->goodshift[i]);
+ }
+ erts_printf("BadShift array:\n");
+ j = 0;
+ for (i = 0; i < ALPHABET_SIZE; i += j) {
+ for (j = 0; i + j < ALPHABET_SIZE && j < 6; ++j) {
+ erts_printf("BS[%03d]:%02ld, ", i+j, bm->badshift[i+j]);
+ }
+ erts_printf("\n");
+ }
+}
+
+static void dump_ac_node(ACNode *node, int indent, int ch) {
+ int i;
+ char *spaces = erts_alloc(ERTS_ALC_T_TMP, 10 * indent + 1);
+ memset(spaces,' ',10*indent);
+ spaces[10*indent] = '\0';
+ erts_printf("%s-> %c\n",spaces,ch);
+ erts_printf("%sId: %u\n",spaces,(unsigned) node->id);
+ erts_printf("%sD: %u\n",spaces,(unsigned)node->d);
+ erts_printf("%sFinal: %d\n",spaces,(int)node->final);
+ erts_printf("%sFail: %u\n",spaces,(unsigned)node->h->id);
+ erts_free(ERTS_ALC_T_TMP,spaces);
+ for(i=0;i<ALPHABET_SIZE;++i) {
+ if (node->g[i] != NULL && node->g[i] != node) {
+ dump_ac_node(node->g[i],indent+1,i);
+ }
+ }
+}
+
+
+static void dump_ac_trie(ACTrie *act)
+{
+ erts_printf("Aho Corasick Trie dump.\n");
+ erts_printf("=======================\n");
+ erts_printf("Node counter: %u\n", (unsigned) act->idc);
+ erts_printf("Searchstring counter: %u\n", (unsigned) act->counter);
+ erts_printf("Trie:\n");
+ dump_ac_node(act->root, 0, '0');
+ return;
+}
+#endif