From fdc8980231b1e791ec4b8f8f3d61a7ba7dda539b Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Mon, 2 Nov 2009 15:31:21 +0100 Subject: Initial commit of the binary EEP --- erts/emulator/beam/binary.c | 976 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 976 insertions(+) (limited to 'erts/emulator/beam/binary.c') diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c index 59c20398d5..29c1af2114 100644 --- a/erts/emulator/beam/binary.c +++ b/erts/emulator/beam/binary.c @@ -675,3 +675,979 @@ bitstr_list_len(Eterm obj) DESTROY_ESTACK(s); return (Sint) -1; } + +#define HARDDEBUG + +/* + * The native implementation functions for the module binary. + * Searching is implemented using aither Boyer-More or Aho-Corasick + * depending on number of searchstrings (BM if one, AC if more than one). + * Native implementation is for efficiency, nothing really *needs* to be + * implemented in native code. + */ + +/* + * A micro allocator used when building search structures, just a convenience + * for building structures inside a pre alocated magic binary using conventional + * malloc-like interface. + */ + +#define MYALIGN(Size) (SIZEOF_VOID_P * (((Size) / SIZEOF_VOID_P) + \ + !!(((Size) % SIZEOF_VOID_P)))) + +#ifdef DEBUG +#define CHECK_ALLOCATOR(My) ASSERT((My).current <= ((My).mem + (My).size)) +#else +#define CHECK_ALLOCATOR(My) /* nothing */ +#endif + +typedef struct _my_allocator { + Uint size; + byte *current; + byte *mem; +} MyAllocator; + +static void init_my_allocator(MyAllocator *my, Uint siz, byte *array) +{ + ASSERT((siz % SIZEOF_VOID_P) == 0); + my->size = siz; + my->mem = array; + my->current = my->mem; +} + +static void *my_alloc(MyAllocator *my, Uint size) +{ + void *ptr = my->current; + my->current += MYALIGN(size); + return ptr; +} + +/* + * The search functionality. + * + * The search is byte oriented, which works nicely for UTF-8 as well as latin1 data + */ + +#define ALPHABET_SIZE 256 + +typedef struct _ac_node { +#ifdef HARDDEBUG + Uint32 id; /* To identify h pointer targets when dumping */ +#endif + Uint32 d; /* Depth in trie, also represents the length + (-1) of the matched string if in + final set */ + Sint32 final; /* Members in final set represent matches. + * The set representation is scattered + * among the nodes in this way: + * >0 -> this represents a member of + * the final set, <0 -> member of + * final set somewhere in the failure chain, + * 0 -> not member of the final set */ + struct _ac_node *h; /* h(Hode) is the failure function */ + struct _ac_node *g[ALPHABET_SIZE]; /* g(Node,Character) is the + transition function */ +} ACNode; + +typedef struct _ac_trie { +#ifdef HARDDEBUG + Uint32 idc; +#endif + Uint32 counter; /* Number of added patterns */ + ACNode *root; /* pointer to the root state */ +} ACTrie; + +typedef struct _bm_data { + int ret_tuple; + byte *x; + Sint len; + Sint *goodshift; + Sint badshift[ALPHABET_SIZE]; +} BMData; + +#ifdef HARDDEBUG +static void dump_bm_data(BMData *bm); +static void dump_ac_trie(ACTrie *act); +static void dump_ac_node(ACNode *node, int indent, int ch); +#endif + +/* + * The needed size of binary data for a search structure - given the accumulated + * string lengths. + */ +#define BM_SIZE(StrLen) /* StrLen: length of searchstring */ \ +((MYALIGN(sizeof(Sint) * (StrLen))) + /* goodshift array */ \ + MYALIGN(StrLen) + /* searchstring saved */ \ + (MYALIGN(sizeof(BMData)))) /* Structure */ + +#define AC_SIZE(StrLens) /* StrLens: sum of all searchstring lengths */ \ +((MYALIGN(sizeof(ACNode)) * \ +((StrLens)+1)) + /* The actual nodes (including rootnode) */ \ + MYALIGN(sizeof(ACTrie))) /* Structure */ + + +#ifndef MAX +#define MAX(A,B) (((A) > (B)) ? (A) : B) +#endif + +/* + * Callback for the magic binary + */ +static void cleanup_my_data(Binary *bp) +{ + return; +} + +/* + * Initiate a (allocated) micro allocator and fill in the base + * for an Aho-Corasick search trie, given the accumulated length of the search strings. + */ +static ACTrie *create_acdata(MyAllocator *my, Process *p, Uint len, + ACNode ***qbuff /* out */,Eterm *the_bin /* out */) +{ + Uint datasize = AC_SIZE(len); + ACTrie *act; + ACNode *acn; + Eterm *hp; + Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data); + byte *data = ERTS_MAGIC_BIN_DATA(mb); + + init_my_allocator(my, datasize, data); + act = my_alloc(my, sizeof(ACTrie)); /* Important that this is the first + allocation */ + act->counter = 0; + act->root = acn = my_alloc(my, sizeof(ACNode)); + acn->d = 0; + acn->final = 0; + acn->h = NULL; + memset(acn->g, 0, sizeof(ACNode *) * ALPHABET_SIZE); +#ifdef HARDDEBUG + act->idc = 0; + acn->id = 0; +#endif + *qbuff = erts_alloc(ERTS_ALC_T_TMP, sizeof(ACNode *) * len); + hp = HAlloc(p, PROC_BIN_SIZE); + *the_bin = erts_mk_magic_binary_term(&hp, &MSO(p), mb); + return act; +} + +/* + * The same initialization of allocator and basic data for Boyer-More. + */ +static BMData *create_bmdata(MyAllocator *my, Process *p, byte *x, Uint len, Eterm *the_bin /* out */) +{ + Uint datasize = BM_SIZE(len); + BMData *bmd; + Eterm *hp; + Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data); + byte *data = ERTS_MAGIC_BIN_DATA(mb); + init_my_allocator(my, datasize, data); + bmd = my_alloc(my, sizeof(BMData)); + bmd->x = my_alloc(my,len); + memcpy(bmd->x,x,len); + bmd->len = len; + bmd->goodshift = my_alloc(my,sizeof(Uint) * len); + bmd->ret_tuple = 0; + hp = HAlloc(p, PROC_BIN_SIZE); + *the_bin = erts_mk_magic_binary_term(&hp, &MSO(p), mb); + return bmd; +} + +/* + * Compilation of search structures + */ + +/* + * Aho Corasick - Build a Trie and fill in the failure functions + * when all strings are added. + * The algorithm is nicely described by Dieter Bühler of University of Tübingen: + * http://www-sr.informatik.uni-tuebingen.de/~buehler/AC/AC.html + */ + +/* + * Helper called ance for each search pattern + */ +static void ac_add_one_pattern(MyAllocator *my, ACTrie *act, byte *x, Uint len) +{ + ACNode *acn = act->root; + Uint32 n = ++act->counter; /* Always increase conter, even if it's a duplicate + as this shall identify the pattern in the + final set and eventually be returned to + the caller (in Erlang) */ + Uint i = 0; + + while(i < len) { + if (acn->g[x[i]] != NULL) { + /* node exists, continue */ + acn = acn->g[x[i]]; + ++i; + } else { + /* allocate a new node */ + ACNode *nn = my_alloc(my,sizeof(ACNode)); +#ifdef HARDDEBUG + nn->id = ++(act->idc); +#endif + nn->d = i+1; + nn->h = act->root; + nn->final = 0; + memset(nn->g, 0, sizeof(ACNode *) * ALPHABET_SIZE); + acn->g[x[i]] = nn; + ++i; + acn = nn; + } + } + if (acn->final == 0) { /* New pattern, add to final set */ + acn->final = n; + } +} + +/* + * Called when all search patterns are added. + */ +static void ac_compute_failure_functions(ACTrie *act, ACNode **qbuff) +{ + ACNode *root = act->root; + ACNode *parent; + int i; + int qh = 0,qt = 0; + ACNode *child, *r; + + /* Set all children of the root to have the root as failure function */ + for (i = 0; i < ALPHABET_SIZE; ++i) { + if (root->g[i] != NULL) { + root->g[i]->h = root; + /* Add to que for later traversal */ + qbuff[qt++] = root->g[i]; + } + } + + /* So, now we've handled children of the root state, traverse the + rest of the trie BF... */ + while (qh < qt) { + parent = qbuff[qh++]; + for (i = 0; i < ALPHABET_SIZE; ++ i) { + if ((child = parent->g[i]) != NULL) { + /* Visit this node to */ + qbuff[qt++] = child; + /* Search for correct failure function, follow the parents failure + function until you find a similar transition funtion to this + childs */ + r = parent->h; + while (r != NULL && r->g[i] == NULL) { + r = r->h; + } + if (r == NULL) { + /* Replace NULL failures with the root as we go */ + child->h = (root->g[i] == NULL) ? root : root->g[i]; + } else { + child->h = r->g[i]; + /* + * The "final" set is scattered among the nodes. When + * the failure function points to a member of the final set, + * we have a match, but we might not see it in the current node + * if we dont mark it as a special type of final, i.e. foolow + * the failure function and you will find a real member of final + * set. This is marked with a negative string id and only done if + * this node does not represent a member in the final set. + */ + if (!(child->final) && (child->h->final)) { + child->final = -1; + } + } + } + } + } + /* Finally the failure function of the root should point to itself */ + root->h = root; +} + +/* + * The actual searching for needles in the haystack... + * Find first match using Aho-Coracick Trie + * return pattern number and fill in mpos + mlen if found, otherwise return 0 + * Return the matching pattern that *starts* first, not ends + * first (difference when overlapping), hence the candidate thing. + * Basic AC finds the first end before the first start... + * + */ +static Uint ac_find_first_match(ACTrie *act, byte *haystack, Uint len, + Uint *mpos, Uint *mlen) +{ + ACNode *q = act->root; + Uint i = 0; + ACNode *candidate = NULL, *r; + Uint candidate_start = 0 /* Init not needed, just quiet the compiler */; + Uint rstart; + + while (i < len) { + while (q->g[haystack[i]] == NULL && q->h != q) { + q = q->h; + } + if (q->g[haystack[i]] != NULL) { + q = q->g[haystack[i]]; + } +#ifdef HARDDEBUG + erts_printf("ch = %c, Current: %u\n", (int) haystack[i], (unsigned) q->id); +#endif + ++i; + if (candidate != NULL && (i - q->d) > candidate_start) { + break; + } + if (q->final) { + r = q; + while (r->final < 0) + r = r->h; + rstart = i - r->d; + if (candidate == NULL || rstart < candidate_start) { + candidate_start = rstart; + candidate = r; + } + } + } + if (!candidate) { + return 0; + } +#ifdef HARDDEBUG + dump_ac_node(candidate,0,'?'); +#endif + *mpos = candidate_start; + *mlen = candidate->d; + return candidate->final; +} + +typedef struct _findall_data { + Uint pos; + Uint len; +#ifdef HARDDEBUG + Uint id; +#endif + Eterm epos; + Eterm elen; +#if 0 + Eterm eid; +#endif +} FindallData; +/* + * Returns number of non overlapping matches + */ +static Uint ac_find_all_non_overlapping(ACTrie *act, byte *haystack, Uint len, + FindallData **data) +{ + ACNode *q = act->root; + Uint i = 0; + Uint rstart; + ACNode *r; + Uint m = 0, save_m; + Uint allocated = 0; + FindallData *out = NULL; + + + while (i < len) { + while (q->g[haystack[i]] == NULL && q->h != q) { + q = q->h; + } + if (q->g[haystack[i]] != NULL) { + q = q->g[haystack[i]]; + } +#ifdef HARDDEBUG + erts_printf("ch = %c, Current: %u\n", (int) haystack[i], (unsigned) q->id); +#endif + ++i; + if (q->final) { + r = q; + while (r->final) { + while (r->final < 0) + r = r->h; +#ifdef HARDDEBUG + erts_printf("Trying to add %u\n",(unsigned) r->final); +#endif + rstart = i - r->d; + save_m = m; + while (m > 0 && out[m-1].pos > rstart) { +#ifdef HARDDEBUG + erts_printf("Popping %u\n",(unsigned) out[m-1].id); +#endif + --m; + } + if (m == 0 || out[m-1].pos + out[m-1].len <= rstart) { + if (m >= allocated) { + if (!allocated) { + allocated = 10; + out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * allocated); + } else { + allocated *= 2; + out = erts_realloc(ERTS_ALC_T_TMP, out, + sizeof(FindallData) * allocated); + } + } + out[m].pos = rstart; + out[m].len = r->d; +#ifdef HARDDEBUG + out[m].id = r->final; +#endif + ++m; +#ifdef HARDDEBUG + erts_printf("Pushing %u\n",(unsigned) out[m-1].id); +#endif + } else { +#ifdef HARDDEBUG + erts_printf("Backtracking %d steps\n",save_m - m); +#endif + m = save_m; + } + r = r->h; + } + } + } + *data = out; + return m; +} + +/* + * Boyer More - most obviously implemented more or less exactly as Christian Charras + * and Thierry Lecroq describes it in "Handbook of Exact String-Matching Algorithms" + * http://www-igm.univ-mlv.fr/~lecroq/string/ + */ + +/* + * Call this to compute badshifts array + */ +static void compute_badshifts(BMData *bmd) +{ + Sint i; + Sint m = bmd->len; + + for (i = 0; i < ALPHABET_SIZE; ++i) { + bmd->badshift[i] = m; + } + for (i = 0; i < m - 1; ++i) { + bmd->badshift[bmd->x[i]] = m - i - 1; + } +} + +/* Helper for "compute_goodshifts" */ +static void compute_suffixes(byte *x, Sint m, Sint *suffixes) +{ + int f,g,i; + + suffixes[m - 1] = m; + + f = 0; /* To avoid use before set warning */ + + g = m - 1; + + for (i = m - 2; i >= 0; --i) { + if (i > g && suffixes[i + m - f] < i - g) { + suffixes[i] = suffixes[i + m - 1 - f]; + } else { + if (i < g) { + g = i; + } + f = i; + while ( g >= 0 && x[g] == x[g + m - 1 - f] ) { + --g; + } + suffixes[i] = f - g; + } + } +} + +/* + * Call this to compute goodshift array + */ +static void compute_goodshifts(BMData *bmd) +{ + Sint m = bmd->len; + byte *x = bmd->x; + Sint i, j; + Sint *suffixes = erts_alloc(ERTS_ALC_T_TMP, m * sizeof(Uint)); + + compute_suffixes(x, m, suffixes); + + for (i = 0; i < m; ++i) { + bmd->goodshift[i] = m; + } + + j = 0; + + for (i = m - 1; i >= -1; --i) { + if (i == -1 || suffixes[i] == i + 1) { + while (j < m - 1 - i) { + if (bmd->goodshift[j] == m) { + bmd->goodshift[j] = m - 1 - i; + } + ++j; + } + } + } + for (i = 0; i <= m - 2; ++i) { + bmd->goodshift[m - 1 - suffixes[i]] = m - 1 - i; + } + erts_free(ERTS_ALC_T_TMP, suffixes); +} + +static Sint bm_find_first_match(BMData *bmd, byte *haystack, Uint len) +{ + Sint blen = bmd->len; + Sint *gs = bmd->goodshift; + Sint *bs = bmd->badshift; + byte *needle = bmd->x; + Sint i; + Sint j = 0; + + while (j <= len - blen) { + for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i) + ; + if (i < 0) { /* found */ + return j; + } + j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i); + } + return -1; +} + +/* + * Interface functions (i.e. "bif's") + */ + +/* + * Search functionality interfaces + */ +BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1) +{ + Eterm t, b, comp_term = NIL; + Uint characters; + Uint words; + int return_tuple = 0; + + characters = 0; + words = 0; + + if (is_list(BIF_ARG_1)) { + return_tuple = 1; + t = BIF_ARG_1; + while (is_list(t)) { + b = CAR(list_val(t)); + t = CDR(list_val(t)); + if (!is_binary(b)) { + goto badarg; + } + if (binary_bitsize(b) != 0) { + goto badarg; + } + ++words; + characters += binary_size(b); + } + if (is_not_nil(t)) { + goto badarg; + } + if (words > 1) { + comp_term = BIF_ARG_1; + } else { + comp_term = CAR(list_val(BIF_ARG_1)); + } + } else if (is_binary(BIF_ARG_1)) { + if (binary_bitsize(BIF_ARG_1) != 0) { + goto badarg; + } + words = 1; + comp_term = BIF_ARG_1; + characters = binary_size(BIF_ARG_1); + } + + if (characters == 0) { + goto badarg; + } + ASSERT(words > 0); + + if (words == 1) { + Eterm ret; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + MyAllocator my; + BMData *bmd; + Eterm *hp; + + ERTS_GET_BINARY_BYTES(comp_term, bytes, bitoffs, bitsize); + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(comp_term, &temp_alloc); + } + bmd = create_bmdata(&my, BIF_P, bytes, characters, &ret); + bmd->ret_tuple = return_tuple; + compute_badshifts(bmd); + compute_goodshifts(bmd); + erts_free_aligned_binary_bytes(temp_alloc); + CHECK_ALLOCATOR(my); + hp = HAlloc(BIF_P,3); + ret = TUPLE2(hp, am_bm, ret); + BIF_RET(ret); + } else { + Eterm ret; + ACTrie *act; + MyAllocator my; + Eterm *hp; + ACNode **qbuff; + + act = create_acdata(&my, BIF_P, characters, &qbuff, &ret); + t = comp_term; + while (is_list(t)) { + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + b = CAR(list_val(t)); + t = CDR(list_val(t)); + ERTS_GET_BINARY_BYTES(b, bytes, bitoffs, bitsize); + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(b, &temp_alloc); + } + ac_add_one_pattern(&my,act,bytes,binary_size(b)); + erts_free_aligned_binary_bytes(temp_alloc); + } + ac_compute_failure_functions(act,qbuff); + CHECK_ALLOCATOR(my); + erts_free(ERTS_ALC_T_TMP,qbuff); + hp = HAlloc(BIF_P,3); + ret = TUPLE2(hp, am_ac, ret); + BIF_RET(ret); + } + badarg: + BIF_ERROR(BIF_P,BADARG); +} + +BIF_RETTYPE binary_match_3(BIF_ALIST_3) +{ + Uint hsstart, hslen; + Eterm *tp; + if (is_not_binary(BIF_ARG_1)) { + goto badarg; + } + if (BIF_ARG_3 == ((Eterm) 0)) { + /* Invalid term, we're called from binary_match_2... */ + hsstart = 0; + hslen = binary_size(BIF_ARG_1); + } else if (is_tuple(BIF_ARG_3)) { + tp = tuple_val(BIF_ARG_3); + if (arityval(*tp) != 2) { + goto badarg; + } + if (!term_to_Uint(tp[1], &hsstart) || ((hsstart >> 16) >> 16) != 0) { + goto badarg; + } + if (!term_to_Uint(tp[2], &hslen) || ((hslen >> 16) >> 16) != 0) { + goto badarg; + } + if (hslen < hsstart) { + goto badarg; + } + if (hslen > binary_size(BIF_ARG_1)-1) { + goto badarg; /* XXX:PaN or should we take as much as we have ? */ + } + hslen = hslen + 1 - hsstart; + } else { + goto badarg; + } + if (hslen == 0) { + BIF_RET(am_nomatch); + } + if (is_tuple(BIF_ARG_2)) { + tp = tuple_val(BIF_ARG_2); + if (arityval(*tp) != 2 || is_not_atom(tp[1])) { + goto badarg; + } + if (tp[1] == am_bm && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { + Binary *mbp; + BMData *bm; + Sint pos; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + Eterm ret; + Eterm *hp; + mbp = ((ProcBin *) binary_val(tp[2]))->val; + if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) { + goto badarg; + } + bm = (BMData *) ERTS_MAGIC_BIN_DATA(mbp); +#ifdef HARDDEBUG + dump_bm_data(bm); +#endif + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + } + pos = bm_find_first_match(bm, bytes + hsstart, hslen); + if (pos < 0) { + ret = am_nomatch; + } else { + Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P); + ret = erts_make_integer(pos+hsstart,BIF_P); + if (bm->ret_tuple) { + hp = HAlloc(BIF_P,3); + ret = TUPLE2(hp, ret, erlen); + } + } + erts_free_aligned_binary_bytes(temp_alloc); + BIF_RET(ret); + } else if (tp[1] == am_ac && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { + Binary *mbp; + ACTrie *act; + Uint pos, msn,rlen; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + Eterm ret; + Eterm *hp; + + mbp = ((ProcBin *) binary_val(tp[2]))->val; + if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) { + goto badarg; + } + act = (ACTrie *) ERTS_MAGIC_BIN_DATA(mbp); +#ifdef HARDDEBUG + dump_ac_trie(act); +#endif + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + } + msn = ac_find_first_match(act, bytes + hsstart, + hslen, &pos, &rlen); + if (msn == 0) { + ret = am_nomatch; + } else { + Eterm epos = erts_make_integer(pos+hsstart,BIF_P); + Eterm erlen = erts_make_integer(rlen,BIF_P); + hp = HAlloc(BIF_P,3); + ret = TUPLE2(hp, epos, erlen); + } + erts_free_aligned_binary_bytes(temp_alloc); + BIF_RET(ret); + } else { + goto badarg; + } + } else { + goto badarg; /* Compilation on the fly NYI */ + } + badarg: + BIF_ERROR(BIF_P,BADARG); +} +BIF_RETTYPE binary_match_2(BIF_ALIST_2) +{ + return binary_match_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); +} + +BIF_RETTYPE binary_matches_3(BIF_ALIST_3) +{ + Uint hsstart, hslen; + Eterm *tp; + if (is_not_binary(BIF_ARG_1)) { + goto badarg; + } + if (BIF_ARG_3 == ((Eterm) 0)) { + /* Invalid term, we're called from binary_match_2... */ + hsstart = 0; + hslen = binary_size(BIF_ARG_1); + } else if (is_tuple(BIF_ARG_3)) { + tp = tuple_val(BIF_ARG_3); + if (arityval(*tp) != 2) { + goto badarg; + } + if (!term_to_Uint(tp[1], &hsstart) || ((hsstart >> 16) >> 16) != 0) { + goto badarg; + } + if (!term_to_Uint(tp[2], &hslen) || ((hslen >> 16) >> 16) != 0) { + goto badarg; + } + if (hslen < hsstart) { + goto badarg; + } + if (hslen > binary_size(BIF_ARG_1)-1) { + goto badarg; /* XXX:PaN or should we take as much as we have ? */ + } + hslen = hslen + 1 - hsstart; + } else { + goto badarg; + } + if (hslen == 0) { + BIF_RET(am_nomatch); + } + if (is_tuple(BIF_ARG_2)) { + tp = tuple_val(BIF_ARG_2); + if (arityval(*tp) != 2 || is_not_atom(tp[1])) { + goto badarg; + } + if (tp[1] == am_bm && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { + Binary *mbp; + BMData *bm; + Sint pos; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + Eterm ret; + Eterm *hp; + mbp = ((ProcBin *) binary_val(tp[2]))->val; + if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) { + goto badarg; + } + bm = (BMData *) ERTS_MAGIC_BIN_DATA(mbp); +#ifdef HARDDEBUG + dump_bm_data(bm); +#endif + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + } + pos = bm_find_first_match(bm, bytes + hsstart, hslen); + if (pos < 0) { + ret = am_nomatch; + } else { + Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P); + ret = erts_make_integer(pos,BIF_P); + if (bm->ret_tuple) { + hp = HAlloc(BIF_P,3); + ret = TUPLE2(hp, ret, erlen); + } + } + erts_free_aligned_binary_bytes(temp_alloc); + BIF_RET(ret); + } else if (tp[1] == am_ac && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { + Binary *mbp; + ACTrie *act; + Uint rlen; + Sint i; + FindallData *fad; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + Eterm ret,tpl; + Eterm *hp; + + mbp = ((ProcBin *) binary_val(tp[2]))->val; + if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) { + goto badarg; + } + act = (ACTrie *) ERTS_MAGIC_BIN_DATA(mbp); +#ifdef HARDDEBUG + dump_ac_trie(act); +#endif + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + } + rlen = ac_find_all_non_overlapping(act, bytes + hsstart, + hslen, &fad); + if (rlen == 0) { + ret = am_nomatch; + } else { + for (i = 0; i < rlen; ++i) { + fad[i].epos = erts_make_integer(fad[i].pos,BIF_P); + fad[i].elen = erts_make_integer(fad[i].len,BIF_P); + } + hp = HAlloc(BIF_P,rlen * (3 + 2)); + ret = NIL; + for (i = rlen - 1; i >= 0; --i) { + tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); + hp +=3; + ret = CONS(hp,tpl,ret); + hp += 2; + } + } + erts_free_aligned_binary_bytes(temp_alloc); + if (fad != NULL) { + erts_free(ERTS_ALC_T_TMP,fad); + } + BIF_RET(ret); + } else { + goto badarg; + } + } else { + goto badarg; /* Compilation on the fly NYI */ + } + badarg: + BIF_ERROR(BIF_P,BADARG); +} +BIF_RETTYPE binary_matches_2(BIF_ALIST_2) +{ + return binary_matches_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); +} + +/* + * Hard debug functions (dump) for the search structures + */ + +#ifdef HARDDEBUG +static void dump_bm_data(BMData *bm) +{ + int i,j; + erts_printf("Dumping Boyer-More structure.\n"); + erts_printf("=============================\n"); + erts_printf("Return tuple: %d\n",bm->ret_tuple); + erts_printf("Searchstring [%ld]:\n", bm->len); + erts_printf("<<"); + for (i = 0; i < bm->len; ++i) { + if (i > 0) { + erts_printf(", "); + } + erts_printf("%d", (int) bm->x[i]); + if (bm->x[i] >= 'A') { + erts_printf(" ($%c)",(char) bm->x[i]); + } + } + erts_printf(">>\n"); + erts_printf("GoodShift array:\n"); + for (i = 0; i < bm->len; ++i) { + erts_printf("GoodShift[%d]: %ld\n", i, bm->goodshift[i]); + } + erts_printf("BadShift array:\n"); + j = 0; + for (i = 0; i < ALPHABET_SIZE; i += j) { + for (j = 0; i + j < ALPHABET_SIZE && j < 6; ++j) { + erts_printf("BS[%03d]:%02ld, ", i+j, bm->badshift[i+j]); + } + erts_printf("\n"); + } +} + +static void dump_ac_node(ACNode *node, int indent, int ch) { + int i; + char *spaces = erts_alloc(ERTS_ALC_T_TMP, 10 * indent + 1); + memset(spaces,' ',10*indent); + spaces[10*indent] = '\0'; + erts_printf("%s-> %c\n",spaces,ch); + erts_printf("%sId: %u\n",spaces,(unsigned) node->id); + erts_printf("%sD: %u\n",spaces,(unsigned)node->d); + erts_printf("%sFinal: %d\n",spaces,(int)node->final); + erts_printf("%sFail: %u\n",spaces,(unsigned)node->h->id); + erts_free(ERTS_ALC_T_TMP,spaces); + for(i=0;ig[i] != NULL && node->g[i] != node) { + dump_ac_node(node->g[i],indent+1,i); + } + } +} + + +static void dump_ac_trie(ACTrie *act) +{ + erts_printf("Aho Corasick Trie dump.\n"); + erts_printf("=======================\n"); + erts_printf("Node counter: %u\n", (unsigned) act->idc); + erts_printf("Searchstring counter: %u\n", (unsigned) act->counter); + erts_printf("Trie:\n"); + dump_ac_node(act->root, 0, '0'); + return; +} +#endif -- cgit v1.2.3 From e0c4c2867c20368c5b5d88cbbf92da7b7a3f386e Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Mon, 2 Nov 2009 15:55:20 +0100 Subject: Teach binary.c the semantics to take longest instead of shortest match Add testcase embryos and reference implementation. Change name of compile function according to EEP31. --- erts/emulator/beam/binary.c | 411 ++++++++++++++++++++++++-------------------- 1 file changed, 224 insertions(+), 187 deletions(-) (limited to 'erts/emulator/beam/binary.c') diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c index 29c1af2114..4662e60d51 100644 --- a/erts/emulator/beam/binary.c +++ b/erts/emulator/beam/binary.c @@ -802,13 +802,12 @@ static void cleanup_my_data(Binary *bp) * Initiate a (allocated) micro allocator and fill in the base * for an Aho-Corasick search trie, given the accumulated length of the search strings. */ -static ACTrie *create_acdata(MyAllocator *my, Process *p, Uint len, - ACNode ***qbuff /* out */,Eterm *the_bin /* out */) +static ACTrie *create_acdata(MyAllocator *my, Uint len, + ACNode ***qbuff /* out */,Binary **the_bin /* out */) { Uint datasize = AC_SIZE(len); ACTrie *act; ACNode *acn; - Eterm *hp; Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data); byte *data = ERTS_MAGIC_BIN_DATA(mb); @@ -826,19 +825,17 @@ static ACTrie *create_acdata(MyAllocator *my, Process *p, Uint len, acn->id = 0; #endif *qbuff = erts_alloc(ERTS_ALC_T_TMP, sizeof(ACNode *) * len); - hp = HAlloc(p, PROC_BIN_SIZE); - *the_bin = erts_mk_magic_binary_term(&hp, &MSO(p), mb); + *the_bin = mb; return act; } /* * The same initialization of allocator and basic data for Boyer-More. */ -static BMData *create_bmdata(MyAllocator *my, Process *p, byte *x, Uint len, Eterm *the_bin /* out */) +static BMData *create_bmdata(MyAllocator *my, byte *x, Uint len, Binary **the_bin /* out */) { Uint datasize = BM_SIZE(len); BMData *bmd; - Eterm *hp; Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data); byte *data = ERTS_MAGIC_BIN_DATA(mb); init_my_allocator(my, datasize, data); @@ -848,8 +845,7 @@ static BMData *create_bmdata(MyAllocator *my, Process *p, byte *x, Uint len, Ete bmd->len = len; bmd->goodshift = my_alloc(my,sizeof(Uint) * len); bmd->ret_tuple = 0; - hp = HAlloc(p, PROC_BIN_SIZE); - *the_bin = erts_mk_magic_binary_term(&hp, &MSO(p), mb); + *the_bin = mb; return bmd; } @@ -998,7 +994,8 @@ static Uint ac_find_first_match(ACTrie *act, byte *haystack, Uint len, while (r->final < 0) r = r->h; rstart = i - r->d; - if (candidate == NULL || rstart < candidate_start) { + if (candidate == NULL || rstart < candidate_start || + (rstart == candidate_start && candidate->d < q->d)) { candidate_start = rstart; candidate = r; } @@ -1063,12 +1060,21 @@ static Uint ac_find_all_non_overlapping(ACTrie *act, byte *haystack, Uint len, #endif rstart = i - r->d; save_m = m; - while (m > 0 && out[m-1].pos > rstart) { + while (m > 0 && (out[m-1].pos > rstart || + (out[m-1].pos == rstart && + out[m-1].len < r->d))) { #ifdef HARDDEBUG erts_printf("Popping %u\n",(unsigned) out[m-1].id); #endif --m; } +#ifdef HARDDEBUG + if (m > 0) { + erts_printf("Pos %u\n",out[m-1].pos); + erts_printf("Len %u\n",out[m-1].len); + } + erts_printf("Rstart %u\n",rstart); +#endif if (m == 0 || out[m-1].pos + out[m-1].len <= rstart) { if (m >= allocated) { if (!allocated) { @@ -1213,7 +1219,8 @@ static Sint bm_find_first_match(BMData *bmd, byte *haystack, Uint len) /* * Search functionality interfaces */ -BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1) + +static int do_binary_match_compile(Eterm argument, Eterm *tag, Binary **binp) { Eterm t, b, comp_term = NIL; Uint characters; @@ -1223,9 +1230,9 @@ BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1) characters = 0; words = 0; - if (is_list(BIF_ARG_1)) { + if (is_list(argument)) { return_tuple = 1; - t = BIF_ARG_1; + t = argument; while (is_list(t)) { b = CAR(list_val(t)); t = CDR(list_val(t)); @@ -1242,17 +1249,17 @@ BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1) goto badarg; } if (words > 1) { - comp_term = BIF_ARG_1; + comp_term = argument; } else { - comp_term = CAR(list_val(BIF_ARG_1)); + comp_term = CAR(list_val(argument)); } - } else if (is_binary(BIF_ARG_1)) { - if (binary_bitsize(BIF_ARG_1) != 0) { + } else if (is_binary(argument)) { + if (binary_bitsize(argument) != 0) { goto badarg; } words = 1; - comp_term = BIF_ARG_1; - characters = binary_size(BIF_ARG_1); + comp_term = argument; + characters = binary_size(argument); } if (characters == 0) { @@ -1261,35 +1268,33 @@ BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1) ASSERT(words > 0); if (words == 1) { - Eterm ret; byte *bytes; Uint bitoffs, bitsize; byte *temp_alloc = NULL; MyAllocator my; BMData *bmd; - Eterm *hp; + Binary *bin; ERTS_GET_BINARY_BYTES(comp_term, bytes, bitoffs, bitsize); if (bitoffs != 0) { bytes = erts_get_aligned_binary_bytes(comp_term, &temp_alloc); } - bmd = create_bmdata(&my, BIF_P, bytes, characters, &ret); + bmd = create_bmdata(&my, bytes, characters, &bin); bmd->ret_tuple = return_tuple; compute_badshifts(bmd); compute_goodshifts(bmd); erts_free_aligned_binary_bytes(temp_alloc); CHECK_ALLOCATOR(my); - hp = HAlloc(BIF_P,3); - ret = TUPLE2(hp, am_bm, ret); - BIF_RET(ret); + *tag = am_bm; + *binp = bin; + return 0; } else { - Eterm ret; ACTrie *act; MyAllocator my; - Eterm *hp; ACNode **qbuff; + Binary *bin; - act = create_acdata(&my, BIF_P, characters, &qbuff, &ret); + act = create_acdata(&my, characters, &qbuff, &bin); t = comp_term; while (is_list(t)) { byte *bytes; @@ -1307,18 +1312,37 @@ BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1) ac_compute_failure_functions(act,qbuff); CHECK_ALLOCATOR(my); erts_free(ERTS_ALC_T_TMP,qbuff); - hp = HAlloc(BIF_P,3); - ret = TUPLE2(hp, am_ac, ret); - BIF_RET(ret); + *tag = am_ac; + *binp = bin; + return 0; } badarg: - BIF_ERROR(BIF_P,BADARG); + return -1; +} + +BIF_RETTYPE binary_compile_pattern_1(BIF_ALIST_1) +{ + Binary *bin; + Eterm tag, ret; + Eterm *hp; + + if (do_binary_match_compile(BIF_ARG_1,&tag,&bin)) { + BIF_ERROR(BIF_P,BADARG); + } + hp = HAlloc(BIF_P, PROC_BIN_SIZE+3); + ret = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin); + ret = TUPLE2(hp, tag, ret); + BIF_RET(ret); } + BIF_RETTYPE binary_match_3(BIF_ALIST_3) { Uint hsstart, hslen; Eterm *tp; + Eterm type; + Binary *bin; + Eterm bin_term = NIL; if (is_not_binary(BIF_ARG_1)) { goto badarg; } @@ -1355,85 +1379,90 @@ BIF_RETTYPE binary_match_3(BIF_ALIST_3) if (arityval(*tp) != 2 || is_not_atom(tp[1])) { goto badarg; } - if (tp[1] == am_bm && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { - Binary *mbp; - BMData *bm; - Sint pos; - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - Eterm ret; - Eterm *hp; - mbp = ((ProcBin *) binary_val(tp[2]))->val; - if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) { - goto badarg; - } - bm = (BMData *) ERTS_MAGIC_BIN_DATA(mbp); + if (((tp[1] != am_bm) && (tp[1] != am_ac)) || + !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { + goto badarg; + } + type = tp[1]; + bin = ((ProcBin *) binary_val(tp[2]))->val; + if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) { + goto badarg; + } + bin_term = tp[2]; + } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) { + goto badarg; + } + + if (type == am_bm) { + BMData *bm; + Sint pos; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + Eterm ret; + Eterm *hp; + bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG - dump_bm_data(bm); + dump_bm_data(bm); #endif - ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); - } - pos = bm_find_first_match(bm, bytes + hsstart, hslen); - if (pos < 0) { - ret = am_nomatch; - } else { - Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P); - ret = erts_make_integer(pos+hsstart,BIF_P); - if (bm->ret_tuple) { - hp = HAlloc(BIF_P,3); - ret = TUPLE2(hp, ret, erlen); - } + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + } + pos = bm_find_first_match(bm, bytes + hsstart, hslen); + if (pos < 0) { + ret = am_nomatch; + } else { + Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P); + ret = erts_make_integer(pos+hsstart,BIF_P); + if (bm->ret_tuple) { + hp = HAlloc(BIF_P,3); + ret = TUPLE2(hp, ret, erlen); } - erts_free_aligned_binary_bytes(temp_alloc); - BIF_RET(ret); - } else if (tp[1] == am_ac && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { - Binary *mbp; - ACTrie *act; - Uint pos, msn,rlen; - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - Eterm ret; - Eterm *hp; + } + erts_free_aligned_binary_bytes(temp_alloc); + if (bin_term == NIL) { + erts_bin_free(bin); + } + BIF_RET(ret); + } else if (type == am_ac) { + ACTrie *act; + Uint pos, msn,rlen; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + Eterm ret; + Eterm *hp; - mbp = ((ProcBin *) binary_val(tp[2]))->val; - if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) { - goto badarg; - } - act = (ACTrie *) ERTS_MAGIC_BIN_DATA(mbp); + act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG - dump_ac_trie(act); + dump_ac_trie(act); #endif - ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); - } - msn = ac_find_first_match(act, bytes + hsstart, - hslen, &pos, &rlen); - if (msn == 0) { - ret = am_nomatch; - } else { - Eterm epos = erts_make_integer(pos+hsstart,BIF_P); - Eterm erlen = erts_make_integer(rlen,BIF_P); - hp = HAlloc(BIF_P,3); - ret = TUPLE2(hp, epos, erlen); - } - erts_free_aligned_binary_bytes(temp_alloc); - BIF_RET(ret); - } else { + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + if (bitsize != 0) { goto badarg; } - } else { - goto badarg; /* Compilation on the fly NYI */ + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + } + msn = ac_find_first_match(act, bytes + hsstart, + hslen, &pos, &rlen); + if (msn == 0) { + ret = am_nomatch; + } else { + Eterm epos = erts_make_integer(pos+hsstart,BIF_P); + Eterm erlen = erts_make_integer(rlen,BIF_P); + hp = HAlloc(BIF_P,3); + ret = TUPLE2(hp, epos, erlen); + } + erts_free_aligned_binary_bytes(temp_alloc); + if (bin_term == NIL) { + erts_bin_free(bin); + } + BIF_RET(ret); } badarg: BIF_ERROR(BIF_P,BADARG); @@ -1447,6 +1476,9 @@ BIF_RETTYPE binary_matches_3(BIF_ALIST_3) { Uint hsstart, hslen; Eterm *tp; + Eterm type; + Binary *bin; + Eterm bin_term = NIL; if (is_not_binary(BIF_ARG_1)) { goto badarg; } @@ -1483,98 +1515,103 @@ BIF_RETTYPE binary_matches_3(BIF_ALIST_3) if (arityval(*tp) != 2 || is_not_atom(tp[1])) { goto badarg; } - if (tp[1] == am_bm && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { - Binary *mbp; - BMData *bm; - Sint pos; - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - Eterm ret; - Eterm *hp; - mbp = ((ProcBin *) binary_val(tp[2]))->val; - if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) { - goto badarg; - } - bm = (BMData *) ERTS_MAGIC_BIN_DATA(mbp); + if (((tp[1] != am_bm) && (tp[1] != am_ac)) || + !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { + goto badarg; + } + type = tp[1]; + bin = ((ProcBin *) binary_val(tp[2]))->val; + if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) { + goto badarg; + } + bin_term = tp[2]; + } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) { + goto badarg; + } + + if (type == am_bm) { + BMData *bm; + Sint pos; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + Eterm ret; + Eterm *hp; + bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG - dump_bm_data(bm); + dump_bm_data(bm); #endif - ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); - } - pos = bm_find_first_match(bm, bytes + hsstart, hslen); - if (pos < 0) { - ret = am_nomatch; - } else { - Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P); - ret = erts_make_integer(pos,BIF_P); - if (bm->ret_tuple) { - hp = HAlloc(BIF_P,3); - ret = TUPLE2(hp, ret, erlen); - } + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + } + pos = bm_find_first_match(bm, bytes + hsstart, hslen); + if (pos < 0) { + ret = am_nomatch; + } else { + Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P); + ret = erts_make_integer(pos,BIF_P); + if (bm->ret_tuple) { + hp = HAlloc(BIF_P,3); + ret = TUPLE2(hp, ret, erlen); } - erts_free_aligned_binary_bytes(temp_alloc); - BIF_RET(ret); - } else if (tp[1] == am_ac && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { - Binary *mbp; - ACTrie *act; - Uint rlen; - Sint i; - FindallData *fad; - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - Eterm ret,tpl; - Eterm *hp; + } + erts_free_aligned_binary_bytes(temp_alloc); + if (bin_term == NIL) { + erts_bin_free(bin); + } + BIF_RET(ret); + } else if (type == am_ac) { + ACTrie *act; + Uint rlen; + Sint i; + FindallData *fad; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + Eterm ret,tpl; + Eterm *hp; - mbp = ((ProcBin *) binary_val(tp[2]))->val; - if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) { - goto badarg; - } - act = (ACTrie *) ERTS_MAGIC_BIN_DATA(mbp); + act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG - dump_ac_trie(act); + dump_ac_trie(act); #endif - ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); - } - rlen = ac_find_all_non_overlapping(act, bytes + hsstart, - hslen, &fad); - if (rlen == 0) { - ret = am_nomatch; - } else { - for (i = 0; i < rlen; ++i) { - fad[i].epos = erts_make_integer(fad[i].pos,BIF_P); - fad[i].elen = erts_make_integer(fad[i].len,BIF_P); - } - hp = HAlloc(BIF_P,rlen * (3 + 2)); - ret = NIL; - for (i = rlen - 1; i >= 0; --i) { - tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); - hp +=3; - ret = CONS(hp,tpl,ret); - hp += 2; - } + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + } + rlen = ac_find_all_non_overlapping(act, bytes + hsstart, + hslen, &fad); + if (rlen == 0) { + ret = am_nomatch; + } else { + for (i = 0; i < rlen; ++i) { + fad[i].epos = erts_make_integer(fad[i].pos,BIF_P); + fad[i].elen = erts_make_integer(fad[i].len,BIF_P); } - erts_free_aligned_binary_bytes(temp_alloc); - if (fad != NULL) { - erts_free(ERTS_ALC_T_TMP,fad); + hp = HAlloc(BIF_P,rlen * (3 + 2)); + ret = NIL; + for (i = rlen - 1; i >= 0; --i) { + tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); + hp +=3; + ret = CONS(hp,tpl,ret); + hp += 2; } - BIF_RET(ret); - } else { - goto badarg; } - } else { - goto badarg; /* Compilation on the fly NYI */ + erts_free_aligned_binary_bytes(temp_alloc); + if (fad != NULL) { + erts_free(ERTS_ALC_T_TMP,fad); + } + if (bin_term == NIL) { + erts_bin_free(bin); + } + BIF_RET(ret); } badarg: BIF_ERROR(BIF_P,BADARG); -- cgit v1.2.3 From ba8c9c7c1594b4870936814caf3520a0f4e312f7 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Fri, 16 Apr 2010 18:11:49 +0200 Subject: Teach BIF's binary:match/matches interrupting/restarting Add Boyer More implementation of binary:matches. Cleanup and removed unused code. --- erts/emulator/beam/binary.c | 861 +++++++++++++++++++++++++++++++++----------- 1 file changed, 642 insertions(+), 219 deletions(-) (limited to 'erts/emulator/beam/binary.c') diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c index 4662e60d51..2b110e8b82 100644 --- a/erts/emulator/beam/binary.c +++ b/erts/emulator/beam/binary.c @@ -676,21 +676,45 @@ bitstr_list_len(Eterm obj) return (Sint) -1; } -#define HARDDEBUG - /* * The native implementation functions for the module binary. * Searching is implemented using aither Boyer-More or Aho-Corasick * depending on number of searchstrings (BM if one, AC if more than one). - * Native implementation is for efficiency, nothing really *needs* to be - * implemented in native code. + * Native implementation is mostly for efficiency, nothing (except binary:referenced_byte_size) + * really *needs* to be implemented in native code. */ +/* #define HARDDEBUG */ + /* * A micro allocator used when building search structures, just a convenience * for building structures inside a pre alocated magic binary using conventional * malloc-like interface. */ +static Export binary_match_trap_export; +static BIF_RETTYPE binary_match_trap(BIF_ALIST_3); +static Export binary_matches_trap_export; +static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3); +void erts_init_bif_binary(void) +{ + sys_memset((void *) &binary_match_trap_export, 0, sizeof(Export)); + binary_match_trap_export.address = &binary_match_trap_export.code[3]; + binary_match_trap_export.code[0] = am_erlang; + binary_match_trap_export.code[1] = am_binary_match_trap; + binary_match_trap_export.code[2] = 3; + binary_match_trap_export.code[3] = (BeamInstr) em_apply_bif; + binary_match_trap_export.code[4] = (BeamInstr) &binary_match_trap; + + sys_memset((void *) &binary_matches_trap_export, 0, sizeof(Export)); + binary_matches_trap_export.address = &binary_matches_trap_export.code[3]; + binary_matches_trap_export.code[0] = am_erlang; + binary_matches_trap_export.code[1] = am_binary_matches_trap; + binary_matches_trap_export.code[2] = 3; + binary_matches_trap_export.code[3] = (BeamInstr) em_apply_bif; + binary_matches_trap_export.code[4] = (BeamInstr) &binary_matches_trap; + + return; +} #define MYALIGN(Size) (SIZEOF_VOID_P * (((Size) / SIZEOF_VOID_P) + \ !!(((Size) % SIZEOF_VOID_P)))) @@ -961,21 +985,55 @@ static void ac_compute_failure_functions(ACTrie *act, ACNode **qbuff) * The actual searching for needles in the haystack... * Find first match using Aho-Coracick Trie * return pattern number and fill in mpos + mlen if found, otherwise return 0 - * Return the matching pattern that *starts* first, not ends - * first (difference when overlapping), hence the candidate thing. + * Return the matching pattern that *starts* first, and ends + * last (difference when overlapping), hence the candidate thing. * Basic AC finds the first end before the first start... * */ -static Uint ac_find_first_match(ACTrie *act, byte *haystack, Uint len, - Uint *mpos, Uint *mlen) +typedef struct { + ACNode *q; + Uint pos; + Uint len; + ACNode *candidate; + Uint candidate_start; +} ACFindFirstState; + + +static void ac_init_find_first_match(ACFindFirstState *state, ACTrie *act, Sint startpos, Uint len) { - ACNode *q = act->root; - Uint i = 0; - ACNode *candidate = NULL, *r; - Uint candidate_start = 0 /* Init not needed, just quiet the compiler */; + state->q = act->root; + state->pos = startpos; + state->len = len; + state->candidate = NULL; + state->candidate_start = 0; +} +#define AC_OK 0 +#define AC_NOT_FOUND -1 +#define AC_RESTART -2 + +#define AC_LOOP_FACTOR 1 + +static int ac_find_first_match(ACFindFirstState *state, byte *haystack, + Uint *mpos, Uint *mlen, Uint reductions) +{ + ACNode *q = state->q; + Uint i = state->pos; + ACNode *candidate = state->candidate, *r; + Uint len = state->len; + Uint candidate_start = state->candidate_start; Uint rstart; + register Uint reds = (Uint) reductions; while (i < len) { + if (--reds == 0) { + state->q = q; + state->pos = i; + state->len = len; + state->candidate = candidate; + state->candidate_start = candidate_start; + return AC_RESTART; + } + while (q->g[haystack[i]] == NULL && q->h != q) { q = q->h; } @@ -1002,14 +1060,14 @@ static Uint ac_find_first_match(ACTrie *act, byte *haystack, Uint len, } } if (!candidate) { - return 0; + return AC_NOT_FOUND; } #ifdef HARDDEBUG dump_ac_node(candidate,0,'?'); #endif *mpos = candidate_start; *mlen = candidate->d; - return candidate->final; + return AC_OK; } typedef struct _findall_data { @@ -1020,35 +1078,86 @@ typedef struct _findall_data { #endif Eterm epos; Eterm elen; -#if 0 - Eterm eid; -#endif } FindallData; + +typedef struct { + ACNode *q; + Uint pos; + Uint len; + Uint m; + Uint allocated; + FindallData *out; +} ACFindAllState; + +static void ac_init_find_all(ACFindAllState *state, ACTrie *act, Sint startpos, Uint len) +{ + state->q = act->root; + state->pos = startpos; + state->len = len; + state->m = 0; + state->allocated = 0; + state->out = NULL; +} + +static void ac_restore_find_all(ACFindAllState *state, char *buff) +{ + memcpy(state,buff,sizeof(ACFindAllState)); + state->out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * (state->allocated)); + memcpy(state->out,buff+sizeof(ACFindAllState),sizeof(FindallData)*state->m); +} + +static void ac_serialize_find_all(ACFindAllState *state, char *buff) +{ + memcpy(buff,state,sizeof(ACFindAllState)); + memcpy(buff+sizeof(ACFindAllState),state->out,sizeof(FindallData)*state->m); +} + +static void ac_clean_find_all(ACFindAllState *state) +{ + if (state->out != NULL) { + erts_free(ERTS_ALC_T_TMP, state->out); + } +#ifdef HARDDEBUG + state->out = NULL; + state->allocated = 0; +#endif +} + +#define SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(S) (sizeof(ACFindAllState)+(sizeof(FindallData)*(S).m)) + /* - * Returns number of non overlapping matches + * Differs to the find_first function in that it stores all matches and the values + * arte returned only in the state. */ -static Uint ac_find_all_non_overlapping(ACTrie *act, byte *haystack, Uint len, - FindallData **data) +static int ac_find_all_non_overlapping(ACFindAllState *state, byte *haystack, Uint reductions) { - ACNode *q = act->root; - Uint i = 0; + ACNode *q = state->q; + Uint i = state->pos; Uint rstart; ACNode *r; - Uint m = 0, save_m; - Uint allocated = 0; - FindallData *out = NULL; + Uint len = state->len; + Uint m = state->m, save_m; + Uint allocated = state->allocated; + FindallData *out = state->out; + register Uint reds = (Uint) reductions; while (i < len) { + if (--reds == 0) { + state->q = q; + state->pos = i; + state->len = len; + state->m = m; + state->allocated = allocated; + state->out = out; + return AC_RESTART; + } while (q->g[haystack[i]] == NULL && q->h != q) { q = q->h; } if (q->g[haystack[i]] != NULL) { q = q->g[haystack[i]]; } -#ifdef HARDDEBUG - erts_printf("ch = %c, Current: %u\n", (int) haystack[i], (unsigned) q->id); -#endif ++i; if (q->final) { r = q; @@ -1105,8 +1214,9 @@ static Uint ac_find_all_non_overlapping(ACTrie *act, byte *haystack, Uint len, } } } - *data = out; - return m; + state->m = m; + state->out = out; + return (m == 0) ? AC_NOT_FOUND : AC_OK; } /* @@ -1192,16 +1302,39 @@ static void compute_goodshifts(BMData *bmd) erts_free(ERTS_ALC_T_TMP, suffixes); } -static Sint bm_find_first_match(BMData *bmd, byte *haystack, Uint len) +typedef struct { + Sint pos; + Uint len; +} BMFindFirstState; + +#define BM_OK 0 /* used only for find_all */ +#define BM_NOT_FOUND -1 +#define BM_RESTART -2 +#define BM_LOOP_FACTOR 1 + +static void bm_init_find_first_match(BMFindFirstState *state, Sint startpos, Uint len) +{ + state->pos = startpos; + state->len = len; +} + + +static Sint bm_find_first_match(BMFindFirstState *state, BMData *bmd, byte *haystack, Uint reductions) { Sint blen = bmd->len; + Uint len = state->len; Sint *gs = bmd->goodshift; Sint *bs = bmd->badshift; byte *needle = bmd->x; Sint i; - Sint j = 0; + Sint j = state->pos; + register Uint reds = reductions; while (j <= len - blen) { + if (--reds == 0) { + state->pos = j; + return BM_RESTART; + } for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i) ; if (i < 0) { /* found */ @@ -1209,7 +1342,103 @@ static Sint bm_find_first_match(BMData *bmd, byte *haystack, Uint len) } j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i); } - return -1; + return BM_NOT_FOUND; +} + +typedef struct { + Sint pos; + Uint len; + Uint m; + Uint allocated; + FindallData *out; +} BMFindAllState; + +static void bm_init_find_all(BMFindAllState *state, Sint startpos, Uint len) +{ + state->pos = startpos; + state->len = len; + state->m = 0; + state->allocated = 0; + state->out = NULL; +} + +static void bm_restore_find_all(BMFindAllState *state, char *buff) +{ + memcpy(state,buff,sizeof(BMFindAllState)); + state->out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * (state->allocated)); + memcpy(state->out,buff+sizeof(BMFindAllState),sizeof(FindallData)*state->m); +} + +static void bm_serialize_find_all(BMFindAllState *state, char *buff) +{ + memcpy(buff,state,sizeof(BMFindAllState)); + memcpy(buff+sizeof(BMFindAllState),state->out,sizeof(FindallData)*state->m); +} + +static void bm_clean_find_all(BMFindAllState *state) +{ + if (state->out != NULL) { + erts_free(ERTS_ALC_T_TMP, state->out); + } +#ifdef HARDDEBUG + state->out = NULL; + state->allocated = 0; +#endif +} + +#define SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(S) (sizeof(BMFindAllState)+(sizeof(FindallData)*(S).m)) + +/* + * Differs to the find_first function in that it stores all matches and the values + * arte returned only in the state. + */ +static Sint bm_find_all_non_overlapping(BMFindAllState *state, + BMData *bmd, byte *haystack, Uint reductions) +{ + Sint blen = bmd->len; + Uint len = state->len; + Sint *gs = bmd->goodshift; + Sint *bs = bmd->badshift; + byte *needle = bmd->x; + Sint i; + Sint j = state->pos; + Uint m = state->m; + Uint allocated = state->allocated; + FindallData *out = state->out; + register Uint reds = reductions; + + while (j <= len - blen) { + if (--reds == 0) { + state->pos = j; + state->m = m; + state->allocated = allocated; + state->out = out; + return BM_RESTART; + } + for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i) + ; + if (i < 0) { /* found */ + if (m >= allocated) { + if (!allocated) { + allocated = 10; + out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * allocated); + } else { + allocated *= 2; + out = erts_realloc(ERTS_ALC_T_TMP, out, + sizeof(FindallData) * allocated); + } + } + out[m].pos = j; + out[m].len = blen; + ++m; + j += blen; + } else { + j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i); + } + } + state->m = m; + state->out = out; + return (m == 0) ? BM_NOT_FOUND : BM_OK; } /* @@ -1335,150 +1564,310 @@ BIF_RETTYPE binary_compile_pattern_1(BIF_ALIST_1) BIF_RET(ret); } +#define DO_BIN_MATCH_OK 0 +#define DO_BIN_MATCH_BADARG -1 +#define DO_BIN_MATCH_RESTART -2 -BIF_RETTYPE binary_match_3(BIF_ALIST_3) +static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, + Eterm type, Binary *bin, Eterm state_term, Eterm *res_term) { - Uint hsstart, hslen; - Eterm *tp; - Eterm type; - Binary *bin; - Eterm bin_term = NIL; - if (is_not_binary(BIF_ARG_1)) { - goto badarg; - } - if (BIF_ARG_3 == ((Eterm) 0)) { - /* Invalid term, we're called from binary_match_2... */ - hsstart = 0; - hslen = binary_size(BIF_ARG_1); - } else if (is_tuple(BIF_ARG_3)) { - tp = tuple_val(BIF_ARG_3); - if (arityval(*tp) != 2) { - goto badarg; - } - if (!term_to_Uint(tp[1], &hsstart) || ((hsstart >> 16) >> 16) != 0) { - goto badarg; - } - if (!term_to_Uint(tp[2], &hslen) || ((hslen >> 16) >> 16) != 0) { - goto badarg; - } - if (hslen < hsstart) { - goto badarg; - } - if (hslen > binary_size(BIF_ARG_1)-1) { - goto badarg; /* XXX:PaN or should we take as much as we have ? */ - } - hslen = hslen + 1 - hsstart; - } else { + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + + ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize); + if (bitsize != 0) { goto badarg; } - if (hslen == 0) { - BIF_RET(am_nomatch); + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc); } - if (is_tuple(BIF_ARG_2)) { - tp = tuple_val(BIF_ARG_2); - if (arityval(*tp) != 2 || is_not_atom(tp[1])) { - goto badarg; - } - if (((tp[1] != am_bm) && (tp[1] != am_ac)) || - !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { - goto badarg; - } - type = tp[1]; - bin = ((ProcBin *) binary_val(tp[2]))->val; - if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) { - goto badarg; - } - bin_term = tp[2]; - } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) { - goto badarg; + if (state_term != NIL) { + Eterm *ptr = big_val(state_term); + type = ptr[1]; } if (type == am_bm) { BMData *bm; Sint pos; - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; Eterm ret; Eterm *hp; + BMFindFirstState state; + Uint reds = ERTS_BIF_REDS_LEFT(p) * BM_LOOP_FACTOR; + bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG dump_bm_data(bm); #endif - ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + if (state_term == NIL) { + bm_init_find_first_match(&state, hsstart, hslen); + } else { + Eterm *ptr = big_val(state_term); + memcpy(&state,ptr+2,sizeof(state)); } - pos = bm_find_first_match(bm, bytes + hsstart, hslen); - if (pos < 0) { +#ifdef HARDDEBUG + erts_printf("(bm) state->pos = %ld, state->len = %lu\n",state.pos, state.len); +#endif + pos = bm_find_first_match(&state, bm, bytes, reds); + if (pos == BM_NOT_FOUND) { ret = am_nomatch; + } else if (pos == BM_RESTART) { + int x = (sizeof(BMFindFirstState) / sizeof(Eterm)) + + !!(sizeof(BMFindFirstState) % sizeof(Eterm)); +#ifdef HARDDEBUG + erts_printf("Trap bm!\n"); +#endif + hp = HAlloc(p,x+2); + hp[0] = make_pos_bignum_header(x+1); + hp[1] = type; + memcpy(hp+2,&state,sizeof(state)); + *res_term = make_big(hp); + erts_free_aligned_binary_bytes(temp_alloc); + return DO_BIN_MATCH_RESTART; } else { - Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P); - ret = erts_make_integer(pos+hsstart,BIF_P); + ret = erts_make_integer(pos,p); if (bm->ret_tuple) { - hp = HAlloc(BIF_P,3); + Eterm erlen = erts_make_integer((Uint) bm->len, p); + hp = HAlloc(p,3); ret = TUPLE2(hp, ret, erlen); } } erts_free_aligned_binary_bytes(temp_alloc); - if (bin_term == NIL) { - erts_bin_free(bin); - } - BIF_RET(ret); + *res_term = ret; + return DO_BIN_MATCH_OK; } else if (type == am_ac) { ACTrie *act; - Uint pos, msn,rlen; - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; + Uint pos, rlen; + int acr; + ACFindFirstState state; Eterm ret; Eterm *hp; + Uint reds = ERTS_BIF_REDS_LEFT(p) * AC_LOOP_FACTOR; act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG dump_ac_trie(act); #endif - ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + if (state_term == NIL) { + ac_init_find_first_match(&state, act, hsstart, hslen); + } else { + Eterm *ptr = big_val(state_term); + memcpy(&state,ptr+2,sizeof(state)); } - msn = ac_find_first_match(act, bytes + hsstart, - hslen, &pos, &rlen); - if (msn == 0) { + acr = ac_find_first_match(&state, bytes, &pos, &rlen, reds); + if (acr == AC_NOT_FOUND) { ret = am_nomatch; + } else if (acr == AC_RESTART) { + int x = (sizeof(state) / sizeof(Eterm)) + + !!(sizeof(BMFindFirstState) % sizeof(Eterm)); +#ifdef HARDDEBUG + erts_printf("Trap ac!\n"); +#endif + hp = HAlloc(p,x+2); + hp[0] = make_pos_bignum_header(x+1); + hp[1] = type; + memcpy(hp+2,&state,sizeof(state)); + *res_term = make_big(hp); + erts_free_aligned_binary_bytes(temp_alloc); + return DO_BIN_MATCH_RESTART; } else { - Eterm epos = erts_make_integer(pos+hsstart,BIF_P); - Eterm erlen = erts_make_integer(rlen,BIF_P); - hp = HAlloc(BIF_P,3); + Eterm epos = erts_make_integer(pos+hsstart,p); + Eterm erlen = erts_make_integer(rlen,p); + hp = HAlloc(p,3); ret = TUPLE2(hp, epos, erlen); } erts_free_aligned_binary_bytes(temp_alloc); - if (bin_term == NIL) { - erts_bin_free(bin); + *res_term = ret; + return DO_BIN_MATCH_OK; + } + badarg: + return DO_BIN_MATCH_BADARG; +} + +static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, Uint hslen, + Eterm type, Binary *bin, Eterm state_term, Eterm *res_term) +{ + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + + ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc); + } + if (state_term != NIL) { + Eterm *ptr = big_val(state_term); + type = ptr[1]; + } + + if (type == am_bm) { + BMData *bm; + Sint pos; + Eterm ret,tpl; + Eterm *hp; + BMFindAllState state; + Uint reds = ERTS_BIF_REDS_LEFT(p) * BM_LOOP_FACTOR; + + bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); +#ifdef HARDDEBUG + dump_bm_data(bm); +#endif + if (state_term == NIL) { + bm_init_find_all(&state, hsstart, hslen); + } else { + Eterm *ptr = big_val(state_term); + bm_restore_find_all(&state,(char *) (ptr+2)); + } + + pos = bm_find_all_non_overlapping(&state, bm, bytes, reds); + if (pos == BM_NOT_FOUND) { + ret = am_nomatch; + } else if (pos == BM_RESTART) { + int x = (SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + + !!(SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm)); +#ifdef HARDDEBUG + erts_printf("Trap bm!\n"); +#endif + hp = HAlloc(p,x+2); + hp[0] = make_pos_bignum_header(x+1); + hp[1] = type; + bm_serialize_find_all(&state, (char *) (hp+2)); + *res_term = make_big(hp); + erts_free_aligned_binary_bytes(temp_alloc); + bm_clean_find_all(&state); + return DO_BIN_MATCH_RESTART; + } else { + FindallData *fad = state.out; + int i; + for (i = 0; i < state.m; ++i) { + fad[i].epos = erts_make_integer(fad[i].pos,p); + fad[i].elen = erts_make_integer(fad[i].len,p); + } + hp = HAlloc(p,state.m * (3 + 2)); + ret = NIL; + for (i = state.m - 1; i >= 0; --i) { + tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); + hp +=3; + ret = CONS(hp,tpl,ret); + hp += 2; + } + } + erts_free_aligned_binary_bytes(temp_alloc); + bm_clean_find_all(&state); + *res_term = ret; + return DO_BIN_MATCH_OK; + } else if (type == am_ac) { + ACTrie *act; + int acr; + ACFindAllState state; + Eterm ret,tpl; + Eterm *hp; + Uint reds = ERTS_BIF_REDS_LEFT(p) * AC_LOOP_FACTOR; + + act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); +#ifdef HARDDEBUG + dump_ac_trie(act); +#endif + if (state_term == NIL) { + ac_init_find_all(&state, act, hsstart, hslen); + } else { + Eterm *ptr = big_val(state_term); + ac_restore_find_all(&state,(char *) (ptr+2)); } - BIF_RET(ret); + acr = ac_find_all_non_overlapping(&state, bytes, reds); + if (acr == AC_NOT_FOUND) { + ret = am_nomatch; + } else if (acr == AC_RESTART) { + int x = (SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + + !!(SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm)); +#ifdef HARDDEBUG + erts_printf("Trap ac!\n"); +#endif + hp = HAlloc(p,x+2); + hp[0] = make_pos_bignum_header(x+1); + hp[1] = type; + ac_serialize_find_all(&state, (char *) (hp+2)); + *res_term = make_big(hp); + erts_free_aligned_binary_bytes(temp_alloc); + ac_clean_find_all(&state); + return DO_BIN_MATCH_RESTART; + } else { + FindallData *fad = state.out; + int i; + for (i = 0; i < state.m; ++i) { + fad[i].epos = erts_make_integer(fad[i].pos,p); + fad[i].elen = erts_make_integer(fad[i].len,p); + } + hp = HAlloc(p,state.m * (3 + 2)); + ret = NIL; + for (i = state.m - 1; i >= 0; --i) { + tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); + hp +=3; + ret = CONS(hp,tpl,ret); + hp += 2; + } + } + erts_free_aligned_binary_bytes(temp_alloc); + ac_clean_find_all(&state); + *res_term = ret; + return DO_BIN_MATCH_OK; + } + badarg: + return DO_BIN_MATCH_BADARG; +} + +static BIF_RETTYPE binary_match_trap(BIF_ALIST_3) +{ + int runres; + Eterm result; + Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val; + runres = do_binary_match(BIF_P,BIF_ARG_1,0,0,NIL,bin,BIF_ARG_2,&result); + switch (runres) { + case DO_BIN_MATCH_OK: + BIF_RET(result); + case DO_BIN_MATCH_RESTART: + BUMP_ALL_REDS(BIF_P); + BIF_TRAP3(&binary_match_trap_export, BIF_P, BIF_ARG_1, result, BIF_ARG_3); + default: + goto badarg; } badarg: BIF_ERROR(BIF_P,BADARG); } -BIF_RETTYPE binary_match_2(BIF_ALIST_2) + +static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3) { - return binary_match_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); + int runres; + Eterm result; + Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val; + runres = do_binary_matches(BIF_P,BIF_ARG_1,0,0,NIL,bin,BIF_ARG_2,&result); + switch (runres) { + case DO_BIN_MATCH_OK: + BIF_RET(result); + case DO_BIN_MATCH_RESTART: + BUMP_ALL_REDS(BIF_P); + BIF_TRAP3(&binary_matches_trap_export, BIF_P, BIF_ARG_1, result, BIF_ARG_3); + default: + goto badarg; + } + badarg: + BIF_ERROR(BIF_P,BADARG); } -BIF_RETTYPE binary_matches_3(BIF_ALIST_3) + +BIF_RETTYPE binary_match_3(BIF_ALIST_3) { Uint hsstart, hslen; Eterm *tp; Eterm type; Binary *bin; Eterm bin_term = NIL; + int runres; + Eterm result; + if (is_not_binary(BIF_ARG_1)) { goto badarg; } @@ -1486,25 +1875,33 @@ BIF_RETTYPE binary_matches_3(BIF_ALIST_3) /* Invalid term, we're called from binary_match_2... */ hsstart = 0; hslen = binary_size(BIF_ARG_1); - } else if (is_tuple(BIF_ARG_3)) { - tp = tuple_val(BIF_ARG_3); - if (arityval(*tp) != 2) { - goto badarg; - } - if (!term_to_Uint(tp[1], &hsstart) || ((hsstart >> 16) >> 16) != 0) { - goto badarg; - } - if (!term_to_Uint(tp[2], &hslen) || ((hslen >> 16) >> 16) != 0) { - goto badarg; - } - if (hslen < hsstart) { - goto badarg; - } - if (hslen > binary_size(BIF_ARG_1)-1) { - goto badarg; /* XXX:PaN or should we take as much as we have ? */ + } else if (is_list(BIF_ARG_3)) { + Eterm l = BIF_ARG_3; + while(is_list(l)) { + Eterm t = CAR(list_val(l)); + if (!is_tuple(t)) { + goto badarg; + } + tp = tuple_val(t); + if (arityval(*tp) != 2) { + goto badarg; + } + if (!term_to_Uint(tp[1], &hsstart) || ((hsstart >> 16) >> 16) != 0) { + goto badarg; + } + if (!term_to_Uint(tp[2], &hslen) || ((hslen >> 16) >> 16) != 0) { + goto badarg; + } + if (hslen < hsstart) { + goto badarg; + } + if (hslen > binary_size(BIF_ARG_1)-1) { + goto badarg; /* XXX:PaN or should we take as much as we have ? */ + } + hslen = hslen + 1 - hsstart; + l = CDR(list_val(l)); } - hslen = hslen + 1 - hsstart; - } else { + } else if (BIF_ARG_3 != NIL) { goto badarg; } if (hslen == 0) { @@ -1528,94 +1925,120 @@ BIF_RETTYPE binary_matches_3(BIF_ALIST_3) } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) { goto badarg; } + runres = do_binary_match(BIF_P,BIF_ARG_1,hsstart,hslen,type,bin,NIL,&result); + if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { + Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE+3); + bin_term = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin); + } else if (bin_term == NIL) { + erts_bin_free(bin); + } + switch (runres) { + case DO_BIN_MATCH_OK: + BIF_RET(result); + case DO_BIN_MATCH_RESTART: + BUMP_ALL_REDS(BIF_P); + BIF_TRAP3(&binary_match_trap_export, BIF_P, BIF_ARG_1, result, bin_term); + default: + goto badarg; + } + badarg: + BIF_ERROR(BIF_P,BADARG); +} - if (type == am_bm) { - BMData *bm; - Sint pos; - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - Eterm ret; - Eterm *hp; - bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_bm_data(bm); -#endif - ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); - } - pos = bm_find_first_match(bm, bytes + hsstart, hslen); - if (pos < 0) { - ret = am_nomatch; - } else { - Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P); - ret = erts_make_integer(pos,BIF_P); - if (bm->ret_tuple) { - hp = HAlloc(BIF_P,3); - ret = TUPLE2(hp, ret, erlen); - } - } - erts_free_aligned_binary_bytes(temp_alloc); - if (bin_term == NIL) { - erts_bin_free(bin); - } - BIF_RET(ret); - } else if (type == am_ac) { - ACTrie *act; - Uint rlen; - Sint i; - FindallData *fad; - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - Eterm ret,tpl; - Eterm *hp; +BIF_RETTYPE binary_matches_3(BIF_ALIST_3) +{ + Uint hsstart, hslen; + Eterm *tp; + Eterm type; + Binary *bin; + Eterm bin_term = NIL; + int runres; + Eterm result; - act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_ac_trie(act); -#endif - ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); - } - rlen = ac_find_all_non_overlapping(act, bytes + hsstart, - hslen, &fad); - if (rlen == 0) { - ret = am_nomatch; - } else { - for (i = 0; i < rlen; ++i) { - fad[i].epos = erts_make_integer(fad[i].pos,BIF_P); - fad[i].elen = erts_make_integer(fad[i].len,BIF_P); + if (is_not_binary(BIF_ARG_1)) { + goto badarg; + } + if (BIF_ARG_3 == ((Eterm) 0)) { + /* Invalid term, we're called from binary_matches_2... */ + hsstart = 0; + hslen = binary_size(BIF_ARG_1); + } else if (is_list(BIF_ARG_3)) { + Eterm l = BIF_ARG_3; + while(is_list(l)) { + Eterm t = CAR(list_val(l)); + if (!is_tuple(t)) { + goto badarg; } - hp = HAlloc(BIF_P,rlen * (3 + 2)); - ret = NIL; - for (i = rlen - 1; i >= 0; --i) { - tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); - hp +=3; - ret = CONS(hp,tpl,ret); - hp += 2; + tp = tuple_val(t); + if (arityval(*tp) != 2) { + goto badarg; + } + if (!term_to_Uint(tp[1], &hsstart) || ((hsstart >> 16) >> 16) != 0) { + goto badarg; } + if (!term_to_Uint(tp[2], &hslen) || ((hslen >> 16) >> 16) != 0) { + goto badarg; + } + if (hslen < hsstart) { + goto badarg; + } + if (hslen > binary_size(BIF_ARG_1)-1) { + goto badarg; /* XXX:PaN or should we take as much as we have ? */ + } + hslen = hslen + 1 - hsstart; + l = CDR(list_val(l)); } - erts_free_aligned_binary_bytes(temp_alloc); - if (fad != NULL) { - erts_free(ERTS_ALC_T_TMP,fad); + } else if (BIF_ARG_3 != NIL) { + goto badarg; + } + if (hslen == 0) { + BIF_RET(am_nomatch); + } + if (is_tuple(BIF_ARG_2)) { + tp = tuple_val(BIF_ARG_2); + if (arityval(*tp) != 2 || is_not_atom(tp[1])) { + goto badarg; } - if (bin_term == NIL) { - erts_bin_free(bin); + if (((tp[1] != am_bm) && (tp[1] != am_ac)) || + !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { + goto badarg; } - BIF_RET(ret); + type = tp[1]; + bin = ((ProcBin *) binary_val(tp[2]))->val; + if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) { + goto badarg; + } + bin_term = tp[2]; + } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) { + goto badarg; + } + runres = do_binary_matches(BIF_P,BIF_ARG_1,hsstart,hslen,type,bin,NIL,&result); + if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { + Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE+3); + bin_term = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin); + } else if (bin_term == NIL) { + erts_bin_free(bin); + } + switch (runres) { + case DO_BIN_MATCH_OK: + BIF_RET(result); + case DO_BIN_MATCH_RESTART: + BUMP_ALL_REDS(BIF_P); + BIF_TRAP3(&binary_matches_trap_export, BIF_P, BIF_ARG_1, result, bin_term); + default: + goto badarg; } badarg: BIF_ERROR(BIF_P,BADARG); } + + +BIF_RETTYPE binary_match_2(BIF_ALIST_2) +{ + return binary_match_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); +} + + BIF_RETTYPE binary_matches_2(BIF_ALIST_2) { return binary_matches_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); -- cgit v1.2.3 From f06f499690ef1f5c8659128095a82d6c9b834d68 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Tue, 20 Apr 2010 17:32:48 +0200 Subject: Add random compare testcase Fix heap-hole when trapping in binary.c Fix boyer more segfaulting when searchstring is longer than haystack --- erts/emulator/beam/binary.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) (limited to 'erts/emulator/beam/binary.c') diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c index 2b110e8b82..f51815d615 100644 --- a/erts/emulator/beam/binary.c +++ b/erts/emulator/beam/binary.c @@ -782,7 +782,6 @@ typedef struct _ac_trie { } ACTrie; typedef struct _bm_data { - int ret_tuple; byte *x; Sint len; Sint *goodshift; @@ -868,7 +867,6 @@ static BMData *create_bmdata(MyAllocator *my, byte *x, Uint len, Binary **the_bi memcpy(bmd->x,x,len); bmd->len = len; bmd->goodshift = my_alloc(my,sizeof(Uint) * len); - bmd->ret_tuple = 0; *the_bin = mb; return bmd; } @@ -1304,7 +1302,7 @@ static void compute_goodshifts(BMData *bmd) typedef struct { Sint pos; - Uint len; + Sint len; } BMFindFirstState; #define BM_OK 0 /* used only for find_all */ @@ -1315,14 +1313,14 @@ typedef struct { static void bm_init_find_first_match(BMFindFirstState *state, Sint startpos, Uint len) { state->pos = startpos; - state->len = len; + state->len = (Sint) len; } static Sint bm_find_first_match(BMFindFirstState *state, BMData *bmd, byte *haystack, Uint reductions) { Sint blen = bmd->len; - Uint len = state->len; + Sint len = state->len; Sint *gs = bmd->goodshift; Sint *bs = bmd->badshift; byte *needle = bmd->x; @@ -1347,7 +1345,7 @@ static Sint bm_find_first_match(BMFindFirstState *state, BMData *bmd, byte *hays typedef struct { Sint pos; - Uint len; + Sint len; Uint m; Uint allocated; FindallData *out; @@ -1356,7 +1354,7 @@ typedef struct { static void bm_init_find_all(BMFindAllState *state, Sint startpos, Uint len) { state->pos = startpos; - state->len = len; + state->len = (Sint) len; state->m = 0; state->allocated = 0; state->out = NULL; @@ -1396,7 +1394,7 @@ static Sint bm_find_all_non_overlapping(BMFindAllState *state, BMData *bmd, byte *haystack, Uint reductions) { Sint blen = bmd->len; - Uint len = state->len; + Sint len = state->len; Sint *gs = bmd->goodshift; Sint *bs = bmd->badshift; byte *needle = bmd->x; @@ -1454,13 +1452,11 @@ static int do_binary_match_compile(Eterm argument, Eterm *tag, Binary **binp) Eterm t, b, comp_term = NIL; Uint characters; Uint words; - int return_tuple = 0; characters = 0; words = 0; if (is_list(argument)) { - return_tuple = 1; t = argument; while (is_list(t)) { b = CAR(list_val(t)); @@ -1509,7 +1505,6 @@ static int do_binary_match_compile(Eterm argument, Eterm *tag, Binary **binp) bytes = erts_get_aligned_binary_bytes(comp_term, &temp_alloc); } bmd = create_bmdata(&my, bytes, characters, &bin); - bmd->ret_tuple = return_tuple; compute_badshifts(bmd); compute_goodshifts(bmd); erts_free_aligned_binary_bytes(temp_alloc); @@ -1625,12 +1620,10 @@ static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, erts_free_aligned_binary_bytes(temp_alloc); return DO_BIN_MATCH_RESTART; } else { + Eterm erlen = erts_make_integer((Uint) bm->len, p); ret = erts_make_integer(pos,p); - if (bm->ret_tuple) { - Eterm erlen = erts_make_integer((Uint) bm->len, p); - hp = HAlloc(p,3); - ret = TUPLE2(hp, ret, erlen); - } + hp = HAlloc(p,3); + ret = TUPLE2(hp, ret, erlen); } erts_free_aligned_binary_bytes(temp_alloc); *res_term = ret; @@ -1927,7 +1920,7 @@ BIF_RETTYPE binary_match_3(BIF_ALIST_3) } runres = do_binary_match(BIF_P,BIF_ARG_1,hsstart,hslen,type,bin,NIL,&result); if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { - Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE+3); + Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE); bin_term = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin); } else if (bin_term == NIL) { erts_bin_free(bin); @@ -2014,7 +2007,7 @@ BIF_RETTYPE binary_matches_3(BIF_ALIST_3) } runres = do_binary_matches(BIF_P,BIF_ARG_1,hsstart,hslen,type,bin,NIL,&result); if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { - Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE+3); + Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE); bin_term = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin); } else if (bin_term == NIL) { erts_bin_free(bin); @@ -2054,7 +2047,6 @@ static void dump_bm_data(BMData *bm) int i,j; erts_printf("Dumping Boyer-More structure.\n"); erts_printf("=============================\n"); - erts_printf("Return tuple: %d\n",bm->ret_tuple); erts_printf("Searchstring [%ld]:\n", bm->len); erts_printf("<<"); for (i = 0; i < bm->len; ++i) { -- cgit v1.2.3 From e27516edd537045e1151dc8a95c821ba18aadf4f Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Wed, 21 Apr 2010 16:37:16 +0200 Subject: Count reductions for process even when not trapping Set loop factors to 10. Teach erts_debug:set_internal_state to limit loop factor for binary. Add random tests for matches and match with multiple searchstrings. --- erts/emulator/beam/binary.c | 80 +++++++++++++++++++++++++++++++++------------ 1 file changed, 60 insertions(+), 20 deletions(-) (limited to 'erts/emulator/beam/binary.c') diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c index f51815d615..6b6571a1b2 100644 --- a/erts/emulator/beam/binary.c +++ b/erts/emulator/beam/binary.c @@ -695,6 +695,9 @@ static Export binary_match_trap_export; static BIF_RETTYPE binary_match_trap(BIF_ALIST_3); static Export binary_matches_trap_export; static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3); +static Uint max_loop_limit; + + void erts_init_bif_binary(void) { sys_memset((void *) &binary_match_trap_export, 0, sizeof(Export)); @@ -713,9 +716,31 @@ void erts_init_bif_binary(void) binary_matches_trap_export.code[3] = (BeamInstr) em_apply_bif; binary_matches_trap_export.code[4] = (BeamInstr) &binary_matches_trap; + max_loop_limit = 0; return; } +Sint erts_binary_set_loop_limit(Sint limit) +{ + Sint save = (Sint) max_loop_limit; + if (limit <= 0) { + max_loop_limit = 0; + } else { + max_loop_limit = (Uint) limit; + } + return save; +} + +static Uint get_reds(Process *p, int loop_factor) +{ + Uint reds = ERTS_BIF_REDS_LEFT(p) * loop_factor; + Uint tmp = max_loop_limit; + if (tmp != 0 && tmp < reds) { + return tmp; + } + return reds; +} + #define MYALIGN(Size) (SIZEOF_VOID_P * (((Size) / SIZEOF_VOID_P) + \ !!(((Size) % SIZEOF_VOID_P)))) @@ -979,6 +1004,7 @@ static void ac_compute_failure_functions(ACTrie *act, ACNode **qbuff) root->h = root; } + /* * The actual searching for needles in the haystack... * Find first match using Aho-Coracick Trie @@ -1009,10 +1035,10 @@ static void ac_init_find_first_match(ACFindFirstState *state, ACTrie *act, Sint #define AC_NOT_FOUND -1 #define AC_RESTART -2 -#define AC_LOOP_FACTOR 1 +#define AC_LOOP_FACTOR 10 static int ac_find_first_match(ACFindFirstState *state, byte *haystack, - Uint *mpos, Uint *mlen, Uint reductions) + Uint *mpos, Uint *mlen, Uint *reductions) { ACNode *q = state->q; Uint i = state->pos; @@ -1020,7 +1046,7 @@ static int ac_find_first_match(ACFindFirstState *state, byte *haystack, Uint len = state->len; Uint candidate_start = state->candidate_start; Uint rstart; - register Uint reds = (Uint) reductions; + register Uint reds = *reductions; while (i < len) { if (--reds == 0) { @@ -1057,6 +1083,7 @@ static int ac_find_first_match(ACFindFirstState *state, byte *haystack, } } } + *reductions = reds; if (!candidate) { return AC_NOT_FOUND; } @@ -1127,7 +1154,8 @@ static void ac_clean_find_all(ACFindAllState *state) * Differs to the find_first function in that it stores all matches and the values * arte returned only in the state. */ -static int ac_find_all_non_overlapping(ACFindAllState *state, byte *haystack, Uint reductions) +static int ac_find_all_non_overlapping(ACFindAllState *state, byte *haystack, + Uint *reductions) { ACNode *q = state->q; Uint i = state->pos; @@ -1137,7 +1165,7 @@ static int ac_find_all_non_overlapping(ACFindAllState *state, byte *haystack, Ui Uint m = state->m, save_m; Uint allocated = state->allocated; FindallData *out = state->out; - register Uint reds = (Uint) reductions; + register Uint reds = *reductions; while (i < len) { @@ -1212,6 +1240,7 @@ static int ac_find_all_non_overlapping(ACFindAllState *state, byte *haystack, Ui } } } + *reductions = reds; state->m = m; state->out = out; return (m == 0) ? AC_NOT_FOUND : AC_OK; @@ -1308,7 +1337,7 @@ typedef struct { #define BM_OK 0 /* used only for find_all */ #define BM_NOT_FOUND -1 #define BM_RESTART -2 -#define BM_LOOP_FACTOR 1 +#define BM_LOOP_FACTOR 10 /* Should we have a higher value? */ static void bm_init_find_first_match(BMFindFirstState *state, Sint startpos, Uint len) { @@ -1317,7 +1346,7 @@ static void bm_init_find_first_match(BMFindFirstState *state, Sint startpos, Uin } -static Sint bm_find_first_match(BMFindFirstState *state, BMData *bmd, byte *haystack, Uint reductions) +static Sint bm_find_first_match(BMFindFirstState *state, BMData *bmd, byte *haystack, Uint *reductions) { Sint blen = bmd->len; Sint len = state->len; @@ -1326,7 +1355,7 @@ static Sint bm_find_first_match(BMFindFirstState *state, BMData *bmd, byte *hays byte *needle = bmd->x; Sint i; Sint j = state->pos; - register Uint reds = reductions; + register Uint reds = *reductions; while (j <= len - blen) { if (--reds == 0) { @@ -1336,10 +1365,12 @@ static Sint bm_find_first_match(BMFindFirstState *state, BMData *bmd, byte *hays for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i) ; if (i < 0) { /* found */ + *reductions = reds; return j; } j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i); } + *reductions = reds; return BM_NOT_FOUND; } @@ -1391,7 +1422,7 @@ static void bm_clean_find_all(BMFindAllState *state) * arte returned only in the state. */ static Sint bm_find_all_non_overlapping(BMFindAllState *state, - BMData *bmd, byte *haystack, Uint reductions) + BMData *bmd, byte *haystack, Uint *reductions) { Sint blen = bmd->len; Sint len = state->len; @@ -1403,7 +1434,7 @@ static Sint bm_find_all_non_overlapping(BMFindAllState *state, Uint m = state->m; Uint allocated = state->allocated; FindallData *out = state->out; - register Uint reds = reductions; + register Uint reds = *reductions; while (j <= len - blen) { if (--reds == 0) { @@ -1436,6 +1467,7 @@ static Sint bm_find_all_non_overlapping(BMFindAllState *state, } state->m = m; state->out = out; + *reductions = reds; return (m == 0) ? BM_NOT_FOUND : BM_OK; } @@ -1588,7 +1620,8 @@ static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, Eterm ret; Eterm *hp; BMFindFirstState state; - Uint reds = ERTS_BIF_REDS_LEFT(p) * BM_LOOP_FACTOR; + Uint reds = get_reds(p, BM_LOOP_FACTOR); + Uint save_reds = reds; bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG @@ -1603,7 +1636,7 @@ static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, #ifdef HARDDEBUG erts_printf("(bm) state->pos = %ld, state->len = %lu\n",state.pos, state.len); #endif - pos = bm_find_first_match(&state, bm, bytes, reds); + pos = bm_find_first_match(&state, bm, bytes, &reds); if (pos == BM_NOT_FOUND) { ret = am_nomatch; } else if (pos == BM_RESTART) { @@ -1626,6 +1659,7 @@ static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, ret = TUPLE2(hp, ret, erlen); } erts_free_aligned_binary_bytes(temp_alloc); + BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); *res_term = ret; return DO_BIN_MATCH_OK; } else if (type == am_ac) { @@ -1635,7 +1669,8 @@ static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, ACFindFirstState state; Eterm ret; Eterm *hp; - Uint reds = ERTS_BIF_REDS_LEFT(p) * AC_LOOP_FACTOR; + Uint reds = get_reds(p, AC_LOOP_FACTOR); + Uint save_reds = reds; act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG @@ -1647,7 +1682,7 @@ static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, Eterm *ptr = big_val(state_term); memcpy(&state,ptr+2,sizeof(state)); } - acr = ac_find_first_match(&state, bytes, &pos, &rlen, reds); + acr = ac_find_first_match(&state, bytes, &pos, &rlen, &reds); if (acr == AC_NOT_FOUND) { ret = am_nomatch; } else if (acr == AC_RESTART) { @@ -1670,6 +1705,7 @@ static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, ret = TUPLE2(hp, epos, erlen); } erts_free_aligned_binary_bytes(temp_alloc); + BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); *res_term = ret; return DO_BIN_MATCH_OK; } @@ -1702,7 +1738,8 @@ static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, Uint hslen Eterm ret,tpl; Eterm *hp; BMFindAllState state; - Uint reds = ERTS_BIF_REDS_LEFT(p) * BM_LOOP_FACTOR; + Uint reds = get_reds(p, BM_LOOP_FACTOR); + Uint save_reds = reds; bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG @@ -1715,9 +1752,9 @@ static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, Uint hslen bm_restore_find_all(&state,(char *) (ptr+2)); } - pos = bm_find_all_non_overlapping(&state, bm, bytes, reds); + pos = bm_find_all_non_overlapping(&state, bm, bytes, &reds); if (pos == BM_NOT_FOUND) { - ret = am_nomatch; + ret = NIL; } else if (pos == BM_RESTART) { int x = (SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + !!(SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm)); @@ -1750,6 +1787,7 @@ static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, Uint hslen } erts_free_aligned_binary_bytes(temp_alloc); bm_clean_find_all(&state); + BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); *res_term = ret; return DO_BIN_MATCH_OK; } else if (type == am_ac) { @@ -1758,7 +1796,8 @@ static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, Uint hslen ACFindAllState state; Eterm ret,tpl; Eterm *hp; - Uint reds = ERTS_BIF_REDS_LEFT(p) * AC_LOOP_FACTOR; + Uint reds = get_reds(p, AC_LOOP_FACTOR); + Uint save_reds = reds; act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG @@ -1770,9 +1809,9 @@ static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, Uint hslen Eterm *ptr = big_val(state_term); ac_restore_find_all(&state,(char *) (ptr+2)); } - acr = ac_find_all_non_overlapping(&state, bytes, reds); + acr = ac_find_all_non_overlapping(&state, bytes, &reds); if (acr == AC_NOT_FOUND) { - ret = am_nomatch; + ret = NIL; } else if (acr == AC_RESTART) { int x = (SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + !!(SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm)); @@ -1805,6 +1844,7 @@ static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, Uint hslen } erts_free_aligned_binary_bytes(temp_alloc); ac_clean_find_all(&state); + BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); *res_term = ret; return DO_BIN_MATCH_OK; } -- cgit v1.2.3 From 8e8e10d9d080655edba6dedbc13d9e729f209e2e Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Wed, 21 Apr 2010 17:18:41 +0200 Subject: Move binary module bif's to erl_bif_binary.c --- erts/emulator/beam/binary.c | 1467 ------------------------------------------- 1 file changed, 1467 deletions(-) (limited to 'erts/emulator/beam/binary.c') diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c index 6b6571a1b2..7ca3eb686d 100644 --- a/erts/emulator/beam/binary.c +++ b/erts/emulator/beam/binary.c @@ -676,1470 +676,3 @@ bitstr_list_len(Eterm obj) return (Sint) -1; } -/* - * The native implementation functions for the module binary. - * Searching is implemented using aither Boyer-More or Aho-Corasick - * depending on number of searchstrings (BM if one, AC if more than one). - * Native implementation is mostly for efficiency, nothing (except binary:referenced_byte_size) - * really *needs* to be implemented in native code. - */ - -/* #define HARDDEBUG */ - -/* - * A micro allocator used when building search structures, just a convenience - * for building structures inside a pre alocated magic binary using conventional - * malloc-like interface. - */ -static Export binary_match_trap_export; -static BIF_RETTYPE binary_match_trap(BIF_ALIST_3); -static Export binary_matches_trap_export; -static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3); -static Uint max_loop_limit; - - -void erts_init_bif_binary(void) -{ - sys_memset((void *) &binary_match_trap_export, 0, sizeof(Export)); - binary_match_trap_export.address = &binary_match_trap_export.code[3]; - binary_match_trap_export.code[0] = am_erlang; - binary_match_trap_export.code[1] = am_binary_match_trap; - binary_match_trap_export.code[2] = 3; - binary_match_trap_export.code[3] = (BeamInstr) em_apply_bif; - binary_match_trap_export.code[4] = (BeamInstr) &binary_match_trap; - - sys_memset((void *) &binary_matches_trap_export, 0, sizeof(Export)); - binary_matches_trap_export.address = &binary_matches_trap_export.code[3]; - binary_matches_trap_export.code[0] = am_erlang; - binary_matches_trap_export.code[1] = am_binary_matches_trap; - binary_matches_trap_export.code[2] = 3; - binary_matches_trap_export.code[3] = (BeamInstr) em_apply_bif; - binary_matches_trap_export.code[4] = (BeamInstr) &binary_matches_trap; - - max_loop_limit = 0; - return; -} - -Sint erts_binary_set_loop_limit(Sint limit) -{ - Sint save = (Sint) max_loop_limit; - if (limit <= 0) { - max_loop_limit = 0; - } else { - max_loop_limit = (Uint) limit; - } - return save; -} - -static Uint get_reds(Process *p, int loop_factor) -{ - Uint reds = ERTS_BIF_REDS_LEFT(p) * loop_factor; - Uint tmp = max_loop_limit; - if (tmp != 0 && tmp < reds) { - return tmp; - } - return reds; -} - -#define MYALIGN(Size) (SIZEOF_VOID_P * (((Size) / SIZEOF_VOID_P) + \ - !!(((Size) % SIZEOF_VOID_P)))) - -#ifdef DEBUG -#define CHECK_ALLOCATOR(My) ASSERT((My).current <= ((My).mem + (My).size)) -#else -#define CHECK_ALLOCATOR(My) /* nothing */ -#endif - -typedef struct _my_allocator { - Uint size; - byte *current; - byte *mem; -} MyAllocator; - -static void init_my_allocator(MyAllocator *my, Uint siz, byte *array) -{ - ASSERT((siz % SIZEOF_VOID_P) == 0); - my->size = siz; - my->mem = array; - my->current = my->mem; -} - -static void *my_alloc(MyAllocator *my, Uint size) -{ - void *ptr = my->current; - my->current += MYALIGN(size); - return ptr; -} - -/* - * The search functionality. - * - * The search is byte oriented, which works nicely for UTF-8 as well as latin1 data - */ - -#define ALPHABET_SIZE 256 - -typedef struct _ac_node { -#ifdef HARDDEBUG - Uint32 id; /* To identify h pointer targets when dumping */ -#endif - Uint32 d; /* Depth in trie, also represents the length - (-1) of the matched string if in - final set */ - Sint32 final; /* Members in final set represent matches. - * The set representation is scattered - * among the nodes in this way: - * >0 -> this represents a member of - * the final set, <0 -> member of - * final set somewhere in the failure chain, - * 0 -> not member of the final set */ - struct _ac_node *h; /* h(Hode) is the failure function */ - struct _ac_node *g[ALPHABET_SIZE]; /* g(Node,Character) is the - transition function */ -} ACNode; - -typedef struct _ac_trie { -#ifdef HARDDEBUG - Uint32 idc; -#endif - Uint32 counter; /* Number of added patterns */ - ACNode *root; /* pointer to the root state */ -} ACTrie; - -typedef struct _bm_data { - byte *x; - Sint len; - Sint *goodshift; - Sint badshift[ALPHABET_SIZE]; -} BMData; - -#ifdef HARDDEBUG -static void dump_bm_data(BMData *bm); -static void dump_ac_trie(ACTrie *act); -static void dump_ac_node(ACNode *node, int indent, int ch); -#endif - -/* - * The needed size of binary data for a search structure - given the accumulated - * string lengths. - */ -#define BM_SIZE(StrLen) /* StrLen: length of searchstring */ \ -((MYALIGN(sizeof(Sint) * (StrLen))) + /* goodshift array */ \ - MYALIGN(StrLen) + /* searchstring saved */ \ - (MYALIGN(sizeof(BMData)))) /* Structure */ - -#define AC_SIZE(StrLens) /* StrLens: sum of all searchstring lengths */ \ -((MYALIGN(sizeof(ACNode)) * \ -((StrLens)+1)) + /* The actual nodes (including rootnode) */ \ - MYALIGN(sizeof(ACTrie))) /* Structure */ - - -#ifndef MAX -#define MAX(A,B) (((A) > (B)) ? (A) : B) -#endif - -/* - * Callback for the magic binary - */ -static void cleanup_my_data(Binary *bp) -{ - return; -} - -/* - * Initiate a (allocated) micro allocator and fill in the base - * for an Aho-Corasick search trie, given the accumulated length of the search strings. - */ -static ACTrie *create_acdata(MyAllocator *my, Uint len, - ACNode ***qbuff /* out */,Binary **the_bin /* out */) -{ - Uint datasize = AC_SIZE(len); - ACTrie *act; - ACNode *acn; - Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data); - byte *data = ERTS_MAGIC_BIN_DATA(mb); - - init_my_allocator(my, datasize, data); - act = my_alloc(my, sizeof(ACTrie)); /* Important that this is the first - allocation */ - act->counter = 0; - act->root = acn = my_alloc(my, sizeof(ACNode)); - acn->d = 0; - acn->final = 0; - acn->h = NULL; - memset(acn->g, 0, sizeof(ACNode *) * ALPHABET_SIZE); -#ifdef HARDDEBUG - act->idc = 0; - acn->id = 0; -#endif - *qbuff = erts_alloc(ERTS_ALC_T_TMP, sizeof(ACNode *) * len); - *the_bin = mb; - return act; -} - -/* - * The same initialization of allocator and basic data for Boyer-More. - */ -static BMData *create_bmdata(MyAllocator *my, byte *x, Uint len, Binary **the_bin /* out */) -{ - Uint datasize = BM_SIZE(len); - BMData *bmd; - Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data); - byte *data = ERTS_MAGIC_BIN_DATA(mb); - init_my_allocator(my, datasize, data); - bmd = my_alloc(my, sizeof(BMData)); - bmd->x = my_alloc(my,len); - memcpy(bmd->x,x,len); - bmd->len = len; - bmd->goodshift = my_alloc(my,sizeof(Uint) * len); - *the_bin = mb; - return bmd; -} - -/* - * Compilation of search structures - */ - -/* - * Aho Corasick - Build a Trie and fill in the failure functions - * when all strings are added. - * The algorithm is nicely described by Dieter Bühler of University of Tübingen: - * http://www-sr.informatik.uni-tuebingen.de/~buehler/AC/AC.html - */ - -/* - * Helper called ance for each search pattern - */ -static void ac_add_one_pattern(MyAllocator *my, ACTrie *act, byte *x, Uint len) -{ - ACNode *acn = act->root; - Uint32 n = ++act->counter; /* Always increase conter, even if it's a duplicate - as this shall identify the pattern in the - final set and eventually be returned to - the caller (in Erlang) */ - Uint i = 0; - - while(i < len) { - if (acn->g[x[i]] != NULL) { - /* node exists, continue */ - acn = acn->g[x[i]]; - ++i; - } else { - /* allocate a new node */ - ACNode *nn = my_alloc(my,sizeof(ACNode)); -#ifdef HARDDEBUG - nn->id = ++(act->idc); -#endif - nn->d = i+1; - nn->h = act->root; - nn->final = 0; - memset(nn->g, 0, sizeof(ACNode *) * ALPHABET_SIZE); - acn->g[x[i]] = nn; - ++i; - acn = nn; - } - } - if (acn->final == 0) { /* New pattern, add to final set */ - acn->final = n; - } -} - -/* - * Called when all search patterns are added. - */ -static void ac_compute_failure_functions(ACTrie *act, ACNode **qbuff) -{ - ACNode *root = act->root; - ACNode *parent; - int i; - int qh = 0,qt = 0; - ACNode *child, *r; - - /* Set all children of the root to have the root as failure function */ - for (i = 0; i < ALPHABET_SIZE; ++i) { - if (root->g[i] != NULL) { - root->g[i]->h = root; - /* Add to que for later traversal */ - qbuff[qt++] = root->g[i]; - } - } - - /* So, now we've handled children of the root state, traverse the - rest of the trie BF... */ - while (qh < qt) { - parent = qbuff[qh++]; - for (i = 0; i < ALPHABET_SIZE; ++ i) { - if ((child = parent->g[i]) != NULL) { - /* Visit this node to */ - qbuff[qt++] = child; - /* Search for correct failure function, follow the parents failure - function until you find a similar transition funtion to this - childs */ - r = parent->h; - while (r != NULL && r->g[i] == NULL) { - r = r->h; - } - if (r == NULL) { - /* Replace NULL failures with the root as we go */ - child->h = (root->g[i] == NULL) ? root : root->g[i]; - } else { - child->h = r->g[i]; - /* - * The "final" set is scattered among the nodes. When - * the failure function points to a member of the final set, - * we have a match, but we might not see it in the current node - * if we dont mark it as a special type of final, i.e. foolow - * the failure function and you will find a real member of final - * set. This is marked with a negative string id and only done if - * this node does not represent a member in the final set. - */ - if (!(child->final) && (child->h->final)) { - child->final = -1; - } - } - } - } - } - /* Finally the failure function of the root should point to itself */ - root->h = root; -} - - -/* - * The actual searching for needles in the haystack... - * Find first match using Aho-Coracick Trie - * return pattern number and fill in mpos + mlen if found, otherwise return 0 - * Return the matching pattern that *starts* first, and ends - * last (difference when overlapping), hence the candidate thing. - * Basic AC finds the first end before the first start... - * - */ -typedef struct { - ACNode *q; - Uint pos; - Uint len; - ACNode *candidate; - Uint candidate_start; -} ACFindFirstState; - - -static void ac_init_find_first_match(ACFindFirstState *state, ACTrie *act, Sint startpos, Uint len) -{ - state->q = act->root; - state->pos = startpos; - state->len = len; - state->candidate = NULL; - state->candidate_start = 0; -} -#define AC_OK 0 -#define AC_NOT_FOUND -1 -#define AC_RESTART -2 - -#define AC_LOOP_FACTOR 10 - -static int ac_find_first_match(ACFindFirstState *state, byte *haystack, - Uint *mpos, Uint *mlen, Uint *reductions) -{ - ACNode *q = state->q; - Uint i = state->pos; - ACNode *candidate = state->candidate, *r; - Uint len = state->len; - Uint candidate_start = state->candidate_start; - Uint rstart; - register Uint reds = *reductions; - - while (i < len) { - if (--reds == 0) { - state->q = q; - state->pos = i; - state->len = len; - state->candidate = candidate; - state->candidate_start = candidate_start; - return AC_RESTART; - } - - while (q->g[haystack[i]] == NULL && q->h != q) { - q = q->h; - } - if (q->g[haystack[i]] != NULL) { - q = q->g[haystack[i]]; - } -#ifdef HARDDEBUG - erts_printf("ch = %c, Current: %u\n", (int) haystack[i], (unsigned) q->id); -#endif - ++i; - if (candidate != NULL && (i - q->d) > candidate_start) { - break; - } - if (q->final) { - r = q; - while (r->final < 0) - r = r->h; - rstart = i - r->d; - if (candidate == NULL || rstart < candidate_start || - (rstart == candidate_start && candidate->d < q->d)) { - candidate_start = rstart; - candidate = r; - } - } - } - *reductions = reds; - if (!candidate) { - return AC_NOT_FOUND; - } -#ifdef HARDDEBUG - dump_ac_node(candidate,0,'?'); -#endif - *mpos = candidate_start; - *mlen = candidate->d; - return AC_OK; -} - -typedef struct _findall_data { - Uint pos; - Uint len; -#ifdef HARDDEBUG - Uint id; -#endif - Eterm epos; - Eterm elen; -} FindallData; - -typedef struct { - ACNode *q; - Uint pos; - Uint len; - Uint m; - Uint allocated; - FindallData *out; -} ACFindAllState; - -static void ac_init_find_all(ACFindAllState *state, ACTrie *act, Sint startpos, Uint len) -{ - state->q = act->root; - state->pos = startpos; - state->len = len; - state->m = 0; - state->allocated = 0; - state->out = NULL; -} - -static void ac_restore_find_all(ACFindAllState *state, char *buff) -{ - memcpy(state,buff,sizeof(ACFindAllState)); - state->out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * (state->allocated)); - memcpy(state->out,buff+sizeof(ACFindAllState),sizeof(FindallData)*state->m); -} - -static void ac_serialize_find_all(ACFindAllState *state, char *buff) -{ - memcpy(buff,state,sizeof(ACFindAllState)); - memcpy(buff+sizeof(ACFindAllState),state->out,sizeof(FindallData)*state->m); -} - -static void ac_clean_find_all(ACFindAllState *state) -{ - if (state->out != NULL) { - erts_free(ERTS_ALC_T_TMP, state->out); - } -#ifdef HARDDEBUG - state->out = NULL; - state->allocated = 0; -#endif -} - -#define SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(S) (sizeof(ACFindAllState)+(sizeof(FindallData)*(S).m)) - -/* - * Differs to the find_first function in that it stores all matches and the values - * arte returned only in the state. - */ -static int ac_find_all_non_overlapping(ACFindAllState *state, byte *haystack, - Uint *reductions) -{ - ACNode *q = state->q; - Uint i = state->pos; - Uint rstart; - ACNode *r; - Uint len = state->len; - Uint m = state->m, save_m; - Uint allocated = state->allocated; - FindallData *out = state->out; - register Uint reds = *reductions; - - - while (i < len) { - if (--reds == 0) { - state->q = q; - state->pos = i; - state->len = len; - state->m = m; - state->allocated = allocated; - state->out = out; - return AC_RESTART; - } - while (q->g[haystack[i]] == NULL && q->h != q) { - q = q->h; - } - if (q->g[haystack[i]] != NULL) { - q = q->g[haystack[i]]; - } - ++i; - if (q->final) { - r = q; - while (r->final) { - while (r->final < 0) - r = r->h; -#ifdef HARDDEBUG - erts_printf("Trying to add %u\n",(unsigned) r->final); -#endif - rstart = i - r->d; - save_m = m; - while (m > 0 && (out[m-1].pos > rstart || - (out[m-1].pos == rstart && - out[m-1].len < r->d))) { -#ifdef HARDDEBUG - erts_printf("Popping %u\n",(unsigned) out[m-1].id); -#endif - --m; - } -#ifdef HARDDEBUG - if (m > 0) { - erts_printf("Pos %u\n",out[m-1].pos); - erts_printf("Len %u\n",out[m-1].len); - } - erts_printf("Rstart %u\n",rstart); -#endif - if (m == 0 || out[m-1].pos + out[m-1].len <= rstart) { - if (m >= allocated) { - if (!allocated) { - allocated = 10; - out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * allocated); - } else { - allocated *= 2; - out = erts_realloc(ERTS_ALC_T_TMP, out, - sizeof(FindallData) * allocated); - } - } - out[m].pos = rstart; - out[m].len = r->d; -#ifdef HARDDEBUG - out[m].id = r->final; -#endif - ++m; -#ifdef HARDDEBUG - erts_printf("Pushing %u\n",(unsigned) out[m-1].id); -#endif - } else { -#ifdef HARDDEBUG - erts_printf("Backtracking %d steps\n",save_m - m); -#endif - m = save_m; - } - r = r->h; - } - } - } - *reductions = reds; - state->m = m; - state->out = out; - return (m == 0) ? AC_NOT_FOUND : AC_OK; -} - -/* - * Boyer More - most obviously implemented more or less exactly as Christian Charras - * and Thierry Lecroq describes it in "Handbook of Exact String-Matching Algorithms" - * http://www-igm.univ-mlv.fr/~lecroq/string/ - */ - -/* - * Call this to compute badshifts array - */ -static void compute_badshifts(BMData *bmd) -{ - Sint i; - Sint m = bmd->len; - - for (i = 0; i < ALPHABET_SIZE; ++i) { - bmd->badshift[i] = m; - } - for (i = 0; i < m - 1; ++i) { - bmd->badshift[bmd->x[i]] = m - i - 1; - } -} - -/* Helper for "compute_goodshifts" */ -static void compute_suffixes(byte *x, Sint m, Sint *suffixes) -{ - int f,g,i; - - suffixes[m - 1] = m; - - f = 0; /* To avoid use before set warning */ - - g = m - 1; - - for (i = m - 2; i >= 0; --i) { - if (i > g && suffixes[i + m - f] < i - g) { - suffixes[i] = suffixes[i + m - 1 - f]; - } else { - if (i < g) { - g = i; - } - f = i; - while ( g >= 0 && x[g] == x[g + m - 1 - f] ) { - --g; - } - suffixes[i] = f - g; - } - } -} - -/* - * Call this to compute goodshift array - */ -static void compute_goodshifts(BMData *bmd) -{ - Sint m = bmd->len; - byte *x = bmd->x; - Sint i, j; - Sint *suffixes = erts_alloc(ERTS_ALC_T_TMP, m * sizeof(Uint)); - - compute_suffixes(x, m, suffixes); - - for (i = 0; i < m; ++i) { - bmd->goodshift[i] = m; - } - - j = 0; - - for (i = m - 1; i >= -1; --i) { - if (i == -1 || suffixes[i] == i + 1) { - while (j < m - 1 - i) { - if (bmd->goodshift[j] == m) { - bmd->goodshift[j] = m - 1 - i; - } - ++j; - } - } - } - for (i = 0; i <= m - 2; ++i) { - bmd->goodshift[m - 1 - suffixes[i]] = m - 1 - i; - } - erts_free(ERTS_ALC_T_TMP, suffixes); -} - -typedef struct { - Sint pos; - Sint len; -} BMFindFirstState; - -#define BM_OK 0 /* used only for find_all */ -#define BM_NOT_FOUND -1 -#define BM_RESTART -2 -#define BM_LOOP_FACTOR 10 /* Should we have a higher value? */ - -static void bm_init_find_first_match(BMFindFirstState *state, Sint startpos, Uint len) -{ - state->pos = startpos; - state->len = (Sint) len; -} - - -static Sint bm_find_first_match(BMFindFirstState *state, BMData *bmd, byte *haystack, Uint *reductions) -{ - Sint blen = bmd->len; - Sint len = state->len; - Sint *gs = bmd->goodshift; - Sint *bs = bmd->badshift; - byte *needle = bmd->x; - Sint i; - Sint j = state->pos; - register Uint reds = *reductions; - - while (j <= len - blen) { - if (--reds == 0) { - state->pos = j; - return BM_RESTART; - } - for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i) - ; - if (i < 0) { /* found */ - *reductions = reds; - return j; - } - j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i); - } - *reductions = reds; - return BM_NOT_FOUND; -} - -typedef struct { - Sint pos; - Sint len; - Uint m; - Uint allocated; - FindallData *out; -} BMFindAllState; - -static void bm_init_find_all(BMFindAllState *state, Sint startpos, Uint len) -{ - state->pos = startpos; - state->len = (Sint) len; - state->m = 0; - state->allocated = 0; - state->out = NULL; -} - -static void bm_restore_find_all(BMFindAllState *state, char *buff) -{ - memcpy(state,buff,sizeof(BMFindAllState)); - state->out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * (state->allocated)); - memcpy(state->out,buff+sizeof(BMFindAllState),sizeof(FindallData)*state->m); -} - -static void bm_serialize_find_all(BMFindAllState *state, char *buff) -{ - memcpy(buff,state,sizeof(BMFindAllState)); - memcpy(buff+sizeof(BMFindAllState),state->out,sizeof(FindallData)*state->m); -} - -static void bm_clean_find_all(BMFindAllState *state) -{ - if (state->out != NULL) { - erts_free(ERTS_ALC_T_TMP, state->out); - } -#ifdef HARDDEBUG - state->out = NULL; - state->allocated = 0; -#endif -} - -#define SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(S) (sizeof(BMFindAllState)+(sizeof(FindallData)*(S).m)) - -/* - * Differs to the find_first function in that it stores all matches and the values - * arte returned only in the state. - */ -static Sint bm_find_all_non_overlapping(BMFindAllState *state, - BMData *bmd, byte *haystack, Uint *reductions) -{ - Sint blen = bmd->len; - Sint len = state->len; - Sint *gs = bmd->goodshift; - Sint *bs = bmd->badshift; - byte *needle = bmd->x; - Sint i; - Sint j = state->pos; - Uint m = state->m; - Uint allocated = state->allocated; - FindallData *out = state->out; - register Uint reds = *reductions; - - while (j <= len - blen) { - if (--reds == 0) { - state->pos = j; - state->m = m; - state->allocated = allocated; - state->out = out; - return BM_RESTART; - } - for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i) - ; - if (i < 0) { /* found */ - if (m >= allocated) { - if (!allocated) { - allocated = 10; - out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * allocated); - } else { - allocated *= 2; - out = erts_realloc(ERTS_ALC_T_TMP, out, - sizeof(FindallData) * allocated); - } - } - out[m].pos = j; - out[m].len = blen; - ++m; - j += blen; - } else { - j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i); - } - } - state->m = m; - state->out = out; - *reductions = reds; - return (m == 0) ? BM_NOT_FOUND : BM_OK; -} - -/* - * Interface functions (i.e. "bif's") - */ - -/* - * Search functionality interfaces - */ - -static int do_binary_match_compile(Eterm argument, Eterm *tag, Binary **binp) -{ - Eterm t, b, comp_term = NIL; - Uint characters; - Uint words; - - characters = 0; - words = 0; - - if (is_list(argument)) { - t = argument; - while (is_list(t)) { - b = CAR(list_val(t)); - t = CDR(list_val(t)); - if (!is_binary(b)) { - goto badarg; - } - if (binary_bitsize(b) != 0) { - goto badarg; - } - ++words; - characters += binary_size(b); - } - if (is_not_nil(t)) { - goto badarg; - } - if (words > 1) { - comp_term = argument; - } else { - comp_term = CAR(list_val(argument)); - } - } else if (is_binary(argument)) { - if (binary_bitsize(argument) != 0) { - goto badarg; - } - words = 1; - comp_term = argument; - characters = binary_size(argument); - } - - if (characters == 0) { - goto badarg; - } - ASSERT(words > 0); - - if (words == 1) { - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - MyAllocator my; - BMData *bmd; - Binary *bin; - - ERTS_GET_BINARY_BYTES(comp_term, bytes, bitoffs, bitsize); - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(comp_term, &temp_alloc); - } - bmd = create_bmdata(&my, bytes, characters, &bin); - compute_badshifts(bmd); - compute_goodshifts(bmd); - erts_free_aligned_binary_bytes(temp_alloc); - CHECK_ALLOCATOR(my); - *tag = am_bm; - *binp = bin; - return 0; - } else { - ACTrie *act; - MyAllocator my; - ACNode **qbuff; - Binary *bin; - - act = create_acdata(&my, characters, &qbuff, &bin); - t = comp_term; - while (is_list(t)) { - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - b = CAR(list_val(t)); - t = CDR(list_val(t)); - ERTS_GET_BINARY_BYTES(b, bytes, bitoffs, bitsize); - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(b, &temp_alloc); - } - ac_add_one_pattern(&my,act,bytes,binary_size(b)); - erts_free_aligned_binary_bytes(temp_alloc); - } - ac_compute_failure_functions(act,qbuff); - CHECK_ALLOCATOR(my); - erts_free(ERTS_ALC_T_TMP,qbuff); - *tag = am_ac; - *binp = bin; - return 0; - } - badarg: - return -1; -} - -BIF_RETTYPE binary_compile_pattern_1(BIF_ALIST_1) -{ - Binary *bin; - Eterm tag, ret; - Eterm *hp; - - if (do_binary_match_compile(BIF_ARG_1,&tag,&bin)) { - BIF_ERROR(BIF_P,BADARG); - } - hp = HAlloc(BIF_P, PROC_BIN_SIZE+3); - ret = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin); - ret = TUPLE2(hp, tag, ret); - BIF_RET(ret); -} - -#define DO_BIN_MATCH_OK 0 -#define DO_BIN_MATCH_BADARG -1 -#define DO_BIN_MATCH_RESTART -2 - -static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, - Eterm type, Binary *bin, Eterm state_term, Eterm *res_term) -{ - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - - ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc); - } - if (state_term != NIL) { - Eterm *ptr = big_val(state_term); - type = ptr[1]; - } - - if (type == am_bm) { - BMData *bm; - Sint pos; - Eterm ret; - Eterm *hp; - BMFindFirstState state; - Uint reds = get_reds(p, BM_LOOP_FACTOR); - Uint save_reds = reds; - - bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_bm_data(bm); -#endif - if (state_term == NIL) { - bm_init_find_first_match(&state, hsstart, hslen); - } else { - Eterm *ptr = big_val(state_term); - memcpy(&state,ptr+2,sizeof(state)); - } -#ifdef HARDDEBUG - erts_printf("(bm) state->pos = %ld, state->len = %lu\n",state.pos, state.len); -#endif - pos = bm_find_first_match(&state, bm, bytes, &reds); - if (pos == BM_NOT_FOUND) { - ret = am_nomatch; - } else if (pos == BM_RESTART) { - int x = (sizeof(BMFindFirstState) / sizeof(Eterm)) + - !!(sizeof(BMFindFirstState) % sizeof(Eterm)); -#ifdef HARDDEBUG - erts_printf("Trap bm!\n"); -#endif - hp = HAlloc(p,x+2); - hp[0] = make_pos_bignum_header(x+1); - hp[1] = type; - memcpy(hp+2,&state,sizeof(state)); - *res_term = make_big(hp); - erts_free_aligned_binary_bytes(temp_alloc); - return DO_BIN_MATCH_RESTART; - } else { - Eterm erlen = erts_make_integer((Uint) bm->len, p); - ret = erts_make_integer(pos,p); - hp = HAlloc(p,3); - ret = TUPLE2(hp, ret, erlen); - } - erts_free_aligned_binary_bytes(temp_alloc); - BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); - *res_term = ret; - return DO_BIN_MATCH_OK; - } else if (type == am_ac) { - ACTrie *act; - Uint pos, rlen; - int acr; - ACFindFirstState state; - Eterm ret; - Eterm *hp; - Uint reds = get_reds(p, AC_LOOP_FACTOR); - Uint save_reds = reds; - - act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_ac_trie(act); -#endif - if (state_term == NIL) { - ac_init_find_first_match(&state, act, hsstart, hslen); - } else { - Eterm *ptr = big_val(state_term); - memcpy(&state,ptr+2,sizeof(state)); - } - acr = ac_find_first_match(&state, bytes, &pos, &rlen, &reds); - if (acr == AC_NOT_FOUND) { - ret = am_nomatch; - } else if (acr == AC_RESTART) { - int x = (sizeof(state) / sizeof(Eterm)) + - !!(sizeof(BMFindFirstState) % sizeof(Eterm)); -#ifdef HARDDEBUG - erts_printf("Trap ac!\n"); -#endif - hp = HAlloc(p,x+2); - hp[0] = make_pos_bignum_header(x+1); - hp[1] = type; - memcpy(hp+2,&state,sizeof(state)); - *res_term = make_big(hp); - erts_free_aligned_binary_bytes(temp_alloc); - return DO_BIN_MATCH_RESTART; - } else { - Eterm epos = erts_make_integer(pos+hsstart,p); - Eterm erlen = erts_make_integer(rlen,p); - hp = HAlloc(p,3); - ret = TUPLE2(hp, epos, erlen); - } - erts_free_aligned_binary_bytes(temp_alloc); - BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); - *res_term = ret; - return DO_BIN_MATCH_OK; - } - badarg: - return DO_BIN_MATCH_BADARG; -} - -static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, Uint hslen, - Eterm type, Binary *bin, Eterm state_term, Eterm *res_term) -{ - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - - ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc); - } - if (state_term != NIL) { - Eterm *ptr = big_val(state_term); - type = ptr[1]; - } - - if (type == am_bm) { - BMData *bm; - Sint pos; - Eterm ret,tpl; - Eterm *hp; - BMFindAllState state; - Uint reds = get_reds(p, BM_LOOP_FACTOR); - Uint save_reds = reds; - - bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_bm_data(bm); -#endif - if (state_term == NIL) { - bm_init_find_all(&state, hsstart, hslen); - } else { - Eterm *ptr = big_val(state_term); - bm_restore_find_all(&state,(char *) (ptr+2)); - } - - pos = bm_find_all_non_overlapping(&state, bm, bytes, &reds); - if (pos == BM_NOT_FOUND) { - ret = NIL; - } else if (pos == BM_RESTART) { - int x = (SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + - !!(SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm)); -#ifdef HARDDEBUG - erts_printf("Trap bm!\n"); -#endif - hp = HAlloc(p,x+2); - hp[0] = make_pos_bignum_header(x+1); - hp[1] = type; - bm_serialize_find_all(&state, (char *) (hp+2)); - *res_term = make_big(hp); - erts_free_aligned_binary_bytes(temp_alloc); - bm_clean_find_all(&state); - return DO_BIN_MATCH_RESTART; - } else { - FindallData *fad = state.out; - int i; - for (i = 0; i < state.m; ++i) { - fad[i].epos = erts_make_integer(fad[i].pos,p); - fad[i].elen = erts_make_integer(fad[i].len,p); - } - hp = HAlloc(p,state.m * (3 + 2)); - ret = NIL; - for (i = state.m - 1; i >= 0; --i) { - tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); - hp +=3; - ret = CONS(hp,tpl,ret); - hp += 2; - } - } - erts_free_aligned_binary_bytes(temp_alloc); - bm_clean_find_all(&state); - BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); - *res_term = ret; - return DO_BIN_MATCH_OK; - } else if (type == am_ac) { - ACTrie *act; - int acr; - ACFindAllState state; - Eterm ret,tpl; - Eterm *hp; - Uint reds = get_reds(p, AC_LOOP_FACTOR); - Uint save_reds = reds; - - act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_ac_trie(act); -#endif - if (state_term == NIL) { - ac_init_find_all(&state, act, hsstart, hslen); - } else { - Eterm *ptr = big_val(state_term); - ac_restore_find_all(&state,(char *) (ptr+2)); - } - acr = ac_find_all_non_overlapping(&state, bytes, &reds); - if (acr == AC_NOT_FOUND) { - ret = NIL; - } else if (acr == AC_RESTART) { - int x = (SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + - !!(SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm)); -#ifdef HARDDEBUG - erts_printf("Trap ac!\n"); -#endif - hp = HAlloc(p,x+2); - hp[0] = make_pos_bignum_header(x+1); - hp[1] = type; - ac_serialize_find_all(&state, (char *) (hp+2)); - *res_term = make_big(hp); - erts_free_aligned_binary_bytes(temp_alloc); - ac_clean_find_all(&state); - return DO_BIN_MATCH_RESTART; - } else { - FindallData *fad = state.out; - int i; - for (i = 0; i < state.m; ++i) { - fad[i].epos = erts_make_integer(fad[i].pos,p); - fad[i].elen = erts_make_integer(fad[i].len,p); - } - hp = HAlloc(p,state.m * (3 + 2)); - ret = NIL; - for (i = state.m - 1; i >= 0; --i) { - tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); - hp +=3; - ret = CONS(hp,tpl,ret); - hp += 2; - } - } - erts_free_aligned_binary_bytes(temp_alloc); - ac_clean_find_all(&state); - BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); - *res_term = ret; - return DO_BIN_MATCH_OK; - } - badarg: - return DO_BIN_MATCH_BADARG; -} - -static BIF_RETTYPE binary_match_trap(BIF_ALIST_3) -{ - int runres; - Eterm result; - Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val; - runres = do_binary_match(BIF_P,BIF_ARG_1,0,0,NIL,bin,BIF_ARG_2,&result); - switch (runres) { - case DO_BIN_MATCH_OK: - BIF_RET(result); - case DO_BIN_MATCH_RESTART: - BUMP_ALL_REDS(BIF_P); - BIF_TRAP3(&binary_match_trap_export, BIF_P, BIF_ARG_1, result, BIF_ARG_3); - default: - goto badarg; - } - badarg: - BIF_ERROR(BIF_P,BADARG); -} - -static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3) -{ - int runres; - Eterm result; - Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val; - runres = do_binary_matches(BIF_P,BIF_ARG_1,0,0,NIL,bin,BIF_ARG_2,&result); - switch (runres) { - case DO_BIN_MATCH_OK: - BIF_RET(result); - case DO_BIN_MATCH_RESTART: - BUMP_ALL_REDS(BIF_P); - BIF_TRAP3(&binary_matches_trap_export, BIF_P, BIF_ARG_1, result, BIF_ARG_3); - default: - goto badarg; - } - badarg: - BIF_ERROR(BIF_P,BADARG); -} - - -BIF_RETTYPE binary_match_3(BIF_ALIST_3) -{ - Uint hsstart, hslen; - Eterm *tp; - Eterm type; - Binary *bin; - Eterm bin_term = NIL; - int runres; - Eterm result; - - if (is_not_binary(BIF_ARG_1)) { - goto badarg; - } - if (BIF_ARG_3 == ((Eterm) 0)) { - /* Invalid term, we're called from binary_match_2... */ - hsstart = 0; - hslen = binary_size(BIF_ARG_1); - } else if (is_list(BIF_ARG_3)) { - Eterm l = BIF_ARG_3; - while(is_list(l)) { - Eterm t = CAR(list_val(l)); - if (!is_tuple(t)) { - goto badarg; - } - tp = tuple_val(t); - if (arityval(*tp) != 2) { - goto badarg; - } - if (!term_to_Uint(tp[1], &hsstart) || ((hsstart >> 16) >> 16) != 0) { - goto badarg; - } - if (!term_to_Uint(tp[2], &hslen) || ((hslen >> 16) >> 16) != 0) { - goto badarg; - } - if (hslen < hsstart) { - goto badarg; - } - if (hslen > binary_size(BIF_ARG_1)-1) { - goto badarg; /* XXX:PaN or should we take as much as we have ? */ - } - hslen = hslen + 1 - hsstart; - l = CDR(list_val(l)); - } - } else if (BIF_ARG_3 != NIL) { - goto badarg; - } - if (hslen == 0) { - BIF_RET(am_nomatch); - } - if (is_tuple(BIF_ARG_2)) { - tp = tuple_val(BIF_ARG_2); - if (arityval(*tp) != 2 || is_not_atom(tp[1])) { - goto badarg; - } - if (((tp[1] != am_bm) && (tp[1] != am_ac)) || - !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { - goto badarg; - } - type = tp[1]; - bin = ((ProcBin *) binary_val(tp[2]))->val; - if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) { - goto badarg; - } - bin_term = tp[2]; - } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) { - goto badarg; - } - runres = do_binary_match(BIF_P,BIF_ARG_1,hsstart,hslen,type,bin,NIL,&result); - if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { - Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE); - bin_term = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin); - } else if (bin_term == NIL) { - erts_bin_free(bin); - } - switch (runres) { - case DO_BIN_MATCH_OK: - BIF_RET(result); - case DO_BIN_MATCH_RESTART: - BUMP_ALL_REDS(BIF_P); - BIF_TRAP3(&binary_match_trap_export, BIF_P, BIF_ARG_1, result, bin_term); - default: - goto badarg; - } - badarg: - BIF_ERROR(BIF_P,BADARG); -} - -BIF_RETTYPE binary_matches_3(BIF_ALIST_3) -{ - Uint hsstart, hslen; - Eterm *tp; - Eterm type; - Binary *bin; - Eterm bin_term = NIL; - int runres; - Eterm result; - - if (is_not_binary(BIF_ARG_1)) { - goto badarg; - } - if (BIF_ARG_3 == ((Eterm) 0)) { - /* Invalid term, we're called from binary_matches_2... */ - hsstart = 0; - hslen = binary_size(BIF_ARG_1); - } else if (is_list(BIF_ARG_3)) { - Eterm l = BIF_ARG_3; - while(is_list(l)) { - Eterm t = CAR(list_val(l)); - if (!is_tuple(t)) { - goto badarg; - } - tp = tuple_val(t); - if (arityval(*tp) != 2) { - goto badarg; - } - if (!term_to_Uint(tp[1], &hsstart) || ((hsstart >> 16) >> 16) != 0) { - goto badarg; - } - if (!term_to_Uint(tp[2], &hslen) || ((hslen >> 16) >> 16) != 0) { - goto badarg; - } - if (hslen < hsstart) { - goto badarg; - } - if (hslen > binary_size(BIF_ARG_1)-1) { - goto badarg; /* XXX:PaN or should we take as much as we have ? */ - } - hslen = hslen + 1 - hsstart; - l = CDR(list_val(l)); - } - } else if (BIF_ARG_3 != NIL) { - goto badarg; - } - if (hslen == 0) { - BIF_RET(am_nomatch); - } - if (is_tuple(BIF_ARG_2)) { - tp = tuple_val(BIF_ARG_2); - if (arityval(*tp) != 2 || is_not_atom(tp[1])) { - goto badarg; - } - if (((tp[1] != am_bm) && (tp[1] != am_ac)) || - !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { - goto badarg; - } - type = tp[1]; - bin = ((ProcBin *) binary_val(tp[2]))->val; - if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) { - goto badarg; - } - bin_term = tp[2]; - } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) { - goto badarg; - } - runres = do_binary_matches(BIF_P,BIF_ARG_1,hsstart,hslen,type,bin,NIL,&result); - if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { - Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE); - bin_term = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin); - } else if (bin_term == NIL) { - erts_bin_free(bin); - } - switch (runres) { - case DO_BIN_MATCH_OK: - BIF_RET(result); - case DO_BIN_MATCH_RESTART: - BUMP_ALL_REDS(BIF_P); - BIF_TRAP3(&binary_matches_trap_export, BIF_P, BIF_ARG_1, result, bin_term); - default: - goto badarg; - } - badarg: - BIF_ERROR(BIF_P,BADARG); -} - - -BIF_RETTYPE binary_match_2(BIF_ALIST_2) -{ - return binary_match_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); -} - - -BIF_RETTYPE binary_matches_2(BIF_ALIST_2) -{ - return binary_matches_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); -} - -/* - * Hard debug functions (dump) for the search structures - */ - -#ifdef HARDDEBUG -static void dump_bm_data(BMData *bm) -{ - int i,j; - erts_printf("Dumping Boyer-More structure.\n"); - erts_printf("=============================\n"); - erts_printf("Searchstring [%ld]:\n", bm->len); - erts_printf("<<"); - for (i = 0; i < bm->len; ++i) { - if (i > 0) { - erts_printf(", "); - } - erts_printf("%d", (int) bm->x[i]); - if (bm->x[i] >= 'A') { - erts_printf(" ($%c)",(char) bm->x[i]); - } - } - erts_printf(">>\n"); - erts_printf("GoodShift array:\n"); - for (i = 0; i < bm->len; ++i) { - erts_printf("GoodShift[%d]: %ld\n", i, bm->goodshift[i]); - } - erts_printf("BadShift array:\n"); - j = 0; - for (i = 0; i < ALPHABET_SIZE; i += j) { - for (j = 0; i + j < ALPHABET_SIZE && j < 6; ++j) { - erts_printf("BS[%03d]:%02ld, ", i+j, bm->badshift[i+j]); - } - erts_printf("\n"); - } -} - -static void dump_ac_node(ACNode *node, int indent, int ch) { - int i; - char *spaces = erts_alloc(ERTS_ALC_T_TMP, 10 * indent + 1); - memset(spaces,' ',10*indent); - spaces[10*indent] = '\0'; - erts_printf("%s-> %c\n",spaces,ch); - erts_printf("%sId: %u\n",spaces,(unsigned) node->id); - erts_printf("%sD: %u\n",spaces,(unsigned)node->d); - erts_printf("%sFinal: %d\n",spaces,(int)node->final); - erts_printf("%sFail: %u\n",spaces,(unsigned)node->h->id); - erts_free(ERTS_ALC_T_TMP,spaces); - for(i=0;ig[i] != NULL && node->g[i] != node) { - dump_ac_node(node->g[i],indent+1,i); - } - } -} - - -static void dump_ac_trie(ACTrie *act) -{ - erts_printf("Aho Corasick Trie dump.\n"); - erts_printf("=======================\n"); - erts_printf("Node counter: %u\n", (unsigned) act->idc); - erts_printf("Searchstring counter: %u\n", (unsigned) act->counter); - erts_printf("Trie:\n"); - dump_ac_node(act->root, 0, '0'); - return; -} -#endif -- cgit v1.2.3 From 9d2fe9d9af19ab94ff3feb1e7b9ffd83fa6927ff Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Fri, 23 Apr 2010 18:53:51 +0200 Subject: Add binary:longest_common_prefix/longest_common_suffix Add allcoator parameter to erts_get_aligned_binary_bytes_extra. Add testcases for the functions above. Add reference implementation for the functions above. --- erts/emulator/beam/binary.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'erts/emulator/beam/binary.c') diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c index 7ca3eb686d..9c4076c8ff 100644 --- a/erts/emulator/beam/binary.c +++ b/erts/emulator/beam/binary.c @@ -180,7 +180,7 @@ erts_realloc_binary(Eterm bin, size_t size) } byte* -erts_get_aligned_binary_bytes_extra(Eterm bin, byte** base_ptr, unsigned extra) +erts_get_aligned_binary_bytes_extra(Eterm bin, byte** base_ptr, ErtsAlcType_t allocator, unsigned extra) { byte* bytes; Eterm* real_bin; @@ -208,7 +208,7 @@ erts_get_aligned_binary_bytes_extra(Eterm bin, byte** base_ptr, unsigned extra) bytes = (byte *)(&(((ErlHeapBin *) real_bin)->data)) + offs; } if (bit_offs) { - byte* buf = (byte *) erts_alloc(ERTS_ALC_T_TMP, byte_size + extra); + byte* buf = (byte *) erts_alloc(allocator, byte_size + extra); *base_ptr = buf; buf += extra; erts_copy_bits(bytes, bit_offs, 1, buf, 0, 1, byte_size*8); -- cgit v1.2.3 From 1dad48ee9f2e1aba6a0ec69d9cf688705d6f187c Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Thu, 29 Apr 2010 20:06:55 +0200 Subject: Add binary:list_to_bin/1 and binary:copy/1,2 Add testcases for binary:list_to_bin/1 and binary:copy/1,2. Add reference implementation of list_to_bin/1. --- erts/emulator/beam/binary.c | 52 ++++++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 31 deletions(-) (limited to 'erts/emulator/beam/binary.c') diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c index 9c4076c8ff..c68392fad4 100644 --- a/erts/emulator/beam/binary.c +++ b/erts/emulator/beam/binary.c @@ -346,29 +346,40 @@ BIF_RETTYPE bitstring_to_list_1(BIF_ALIST_1) /* Turn a possibly deep list of ints (and binaries) into */ /* One large binary object */ -BIF_RETTYPE list_to_binary_1(BIF_ALIST_1) +/* + * This bif also exists in the binary module, under the name + * binary:list_to_bin/1, why it's divided into interface and + * implementation. Also the backend for iolist_to_binary_1. + */ + +BIF_RETTYPE erts_list_to_binary_bif(Process *p, Eterm arg) { Eterm bin; int i; int offset; byte* bytes; - if (is_nil(BIF_ARG_1)) { - BIF_RET(new_binary(BIF_P,(byte*)"",0)); + if (is_nil(arg)) { + BIF_RET(new_binary(p,(byte*)"",0)); } - if (is_not_list(BIF_ARG_1)) { + if (is_not_list(arg)) { goto error; } - if ((i = io_list_len(BIF_ARG_1)) < 0) { + if ((i = io_list_len(arg)) < 0) { goto error; } - bin = new_binary(BIF_P, (byte *)NULL, i); + bin = new_binary(p, (byte *)NULL, i); bytes = binary_bytes(bin); - offset = io_list_to_buf(BIF_ARG_1, (char*) bytes, i); + offset = io_list_to_buf(arg, (char*) bytes, i); ASSERT(offset == 0); BIF_RET(bin); - error: - BIF_ERROR(BIF_P, BADARG); + error: + BIF_ERROR(p, BADARG); +} + +BIF_RETTYPE list_to_binary_1(BIF_ALIST_1) +{ + return erts_list_to_binary_bif(BIF_P, BIF_ARG_1); } /* Turn a possibly deep list of ints (and binaries) into */ @@ -376,31 +387,10 @@ BIF_RETTYPE list_to_binary_1(BIF_ALIST_1) BIF_RETTYPE iolist_to_binary_1(BIF_ALIST_1) { - Eterm bin; - int i; - int offset; - byte* bytes; - if (is_binary(BIF_ARG_1)) { BIF_RET(BIF_ARG_1); } - if (is_nil(BIF_ARG_1)) { - BIF_RET(new_binary(BIF_P,(byte*)"",0)); - } - if (is_not_list(BIF_ARG_1)) { - goto error; - } - if ((i = io_list_len(BIF_ARG_1)) < 0) { - goto error; - } - bin = new_binary(BIF_P, (byte *)NULL, i); - bytes = binary_bytes(bin); - offset = io_list_to_buf(BIF_ARG_1, (char*) bytes, i); - ASSERT(offset == 0); - BIF_RET(bin); - - error: - BIF_ERROR(BIF_P, BADARG); + return erts_list_to_binary_bif(BIF_P, BIF_ARG_1); } BIF_RETTYPE list_to_bitstring_1(BIF_ALIST_1) -- cgit v1.2.3