From fdc8980231b1e791ec4b8f8f3d61a7ba7dda539b Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Mon, 2 Nov 2009 15:31:21 +0100 Subject: Initial commit of the binary EEP --- erts/emulator/beam/atom.names | 2 + erts/emulator/beam/bif.tab | 34 ++ erts/emulator/beam/binary.c | 976 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1012 insertions(+) (limited to 'erts') diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index 9ce21089ba..e63d56b32a 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -65,6 +65,7 @@ atom EXIT='EXIT' atom aborted atom abs_path atom absoluteURI +atom ac atom active atom all atom all_but_first @@ -102,6 +103,7 @@ atom bif_return_trap atom binary atom block atom blocked +atom bm atom bnot atom bor atom bxor diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index b6fa06354a..8d81aa7eba 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -754,6 +754,40 @@ bif erlang:load_nif/2 bif erlang:call_on_load_function/1 bif erlang:finish_after_on_load/2 +# The binary match bifs (New in R13B04 - EEP9) +# + +# +# The searching/splitting/substituting thingies +# +bif binary:match_compile/1 +bif binary:match/2 +bif binary:match/3 +bif binary:matches/2 +bif binary:matches/3 +# bif binary:split/2 +# bif binary:split/3 +# bif binary:substitute/3 +# bif binary:globally_substitute/3 +# bif binary:duplicate/2 + +# +# XXX:PaN Usecase for these two? Creeping Biffilism? +# +# bif binary:from_unsigned/1 +# bif binary:to_unsigned/1 + +# +# XXX:PaN The following are suggested to be implemented in the erlang code... +# - or are they meant to be guard bif's? +# +# binary:first/1 +# binary:first/2 +# binary:last/1 +# binary:last/2 +# binary:nth/2 +# binary:extract/3 + # # New Bifs in R13B4 # diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c index 59c20398d5..29c1af2114 100644 --- a/erts/emulator/beam/binary.c +++ b/erts/emulator/beam/binary.c @@ -675,3 +675,979 @@ bitstr_list_len(Eterm obj) DESTROY_ESTACK(s); return (Sint) -1; } + +#define HARDDEBUG + +/* + * The native implementation functions for the module binary. + * Searching is implemented using aither Boyer-More or Aho-Corasick + * depending on number of searchstrings (BM if one, AC if more than one). + * Native implementation is for efficiency, nothing really *needs* to be + * implemented in native code. + */ + +/* + * A micro allocator used when building search structures, just a convenience + * for building structures inside a pre alocated magic binary using conventional + * malloc-like interface. + */ + +#define MYALIGN(Size) (SIZEOF_VOID_P * (((Size) / SIZEOF_VOID_P) + \ + !!(((Size) % SIZEOF_VOID_P)))) + +#ifdef DEBUG +#define CHECK_ALLOCATOR(My) ASSERT((My).current <= ((My).mem + (My).size)) +#else +#define CHECK_ALLOCATOR(My) /* nothing */ +#endif + +typedef struct _my_allocator { + Uint size; + byte *current; + byte *mem; +} MyAllocator; + +static void init_my_allocator(MyAllocator *my, Uint siz, byte *array) +{ + ASSERT((siz % SIZEOF_VOID_P) == 0); + my->size = siz; + my->mem = array; + my->current = my->mem; +} + +static void *my_alloc(MyAllocator *my, Uint size) +{ + void *ptr = my->current; + my->current += MYALIGN(size); + return ptr; +} + +/* + * The search functionality. + * + * The search is byte oriented, which works nicely for UTF-8 as well as latin1 data + */ + +#define ALPHABET_SIZE 256 + +typedef struct _ac_node { +#ifdef HARDDEBUG + Uint32 id; /* To identify h pointer targets when dumping */ +#endif + Uint32 d; /* Depth in trie, also represents the length + (-1) of the matched string if in + final set */ + Sint32 final; /* Members in final set represent matches. + * The set representation is scattered + * among the nodes in this way: + * >0 -> this represents a member of + * the final set, <0 -> member of + * final set somewhere in the failure chain, + * 0 -> not member of the final set */ + struct _ac_node *h; /* h(Hode) is the failure function */ + struct _ac_node *g[ALPHABET_SIZE]; /* g(Node,Character) is the + transition function */ +} ACNode; + +typedef struct _ac_trie { +#ifdef HARDDEBUG + Uint32 idc; +#endif + Uint32 counter; /* Number of added patterns */ + ACNode *root; /* pointer to the root state */ +} ACTrie; + +typedef struct _bm_data { + int ret_tuple; + byte *x; + Sint len; + Sint *goodshift; + Sint badshift[ALPHABET_SIZE]; +} BMData; + +#ifdef HARDDEBUG +static void dump_bm_data(BMData *bm); +static void dump_ac_trie(ACTrie *act); +static void dump_ac_node(ACNode *node, int indent, int ch); +#endif + +/* + * The needed size of binary data for a search structure - given the accumulated + * string lengths. + */ +#define BM_SIZE(StrLen) /* StrLen: length of searchstring */ \ +((MYALIGN(sizeof(Sint) * (StrLen))) + /* goodshift array */ \ + MYALIGN(StrLen) + /* searchstring saved */ \ + (MYALIGN(sizeof(BMData)))) /* Structure */ + +#define AC_SIZE(StrLens) /* StrLens: sum of all searchstring lengths */ \ +((MYALIGN(sizeof(ACNode)) * \ +((StrLens)+1)) + /* The actual nodes (including rootnode) */ \ + MYALIGN(sizeof(ACTrie))) /* Structure */ + + +#ifndef MAX +#define MAX(A,B) (((A) > (B)) ? (A) : B) +#endif + +/* + * Callback for the magic binary + */ +static void cleanup_my_data(Binary *bp) +{ + return; +} + +/* + * Initiate a (allocated) micro allocator and fill in the base + * for an Aho-Corasick search trie, given the accumulated length of the search strings. + */ +static ACTrie *create_acdata(MyAllocator *my, Process *p, Uint len, + ACNode ***qbuff /* out */,Eterm *the_bin /* out */) +{ + Uint datasize = AC_SIZE(len); + ACTrie *act; + ACNode *acn; + Eterm *hp; + Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data); + byte *data = ERTS_MAGIC_BIN_DATA(mb); + + init_my_allocator(my, datasize, data); + act = my_alloc(my, sizeof(ACTrie)); /* Important that this is the first + allocation */ + act->counter = 0; + act->root = acn = my_alloc(my, sizeof(ACNode)); + acn->d = 0; + acn->final = 0; + acn->h = NULL; + memset(acn->g, 0, sizeof(ACNode *) * ALPHABET_SIZE); +#ifdef HARDDEBUG + act->idc = 0; + acn->id = 0; +#endif + *qbuff = erts_alloc(ERTS_ALC_T_TMP, sizeof(ACNode *) * len); + hp = HAlloc(p, PROC_BIN_SIZE); + *the_bin = erts_mk_magic_binary_term(&hp, &MSO(p), mb); + return act; +} + +/* + * The same initialization of allocator and basic data for Boyer-More. + */ +static BMData *create_bmdata(MyAllocator *my, Process *p, byte *x, Uint len, Eterm *the_bin /* out */) +{ + Uint datasize = BM_SIZE(len); + BMData *bmd; + Eterm *hp; + Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data); + byte *data = ERTS_MAGIC_BIN_DATA(mb); + init_my_allocator(my, datasize, data); + bmd = my_alloc(my, sizeof(BMData)); + bmd->x = my_alloc(my,len); + memcpy(bmd->x,x,len); + bmd->len = len; + bmd->goodshift = my_alloc(my,sizeof(Uint) * len); + bmd->ret_tuple = 0; + hp = HAlloc(p, PROC_BIN_SIZE); + *the_bin = erts_mk_magic_binary_term(&hp, &MSO(p), mb); + return bmd; +} + +/* + * Compilation of search structures + */ + +/* + * Aho Corasick - Build a Trie and fill in the failure functions + * when all strings are added. + * The algorithm is nicely described by Dieter Bühler of University of Tübingen: + * http://www-sr.informatik.uni-tuebingen.de/~buehler/AC/AC.html + */ + +/* + * Helper called ance for each search pattern + */ +static void ac_add_one_pattern(MyAllocator *my, ACTrie *act, byte *x, Uint len) +{ + ACNode *acn = act->root; + Uint32 n = ++act->counter; /* Always increase conter, even if it's a duplicate + as this shall identify the pattern in the + final set and eventually be returned to + the caller (in Erlang) */ + Uint i = 0; + + while(i < len) { + if (acn->g[x[i]] != NULL) { + /* node exists, continue */ + acn = acn->g[x[i]]; + ++i; + } else { + /* allocate a new node */ + ACNode *nn = my_alloc(my,sizeof(ACNode)); +#ifdef HARDDEBUG + nn->id = ++(act->idc); +#endif + nn->d = i+1; + nn->h = act->root; + nn->final = 0; + memset(nn->g, 0, sizeof(ACNode *) * ALPHABET_SIZE); + acn->g[x[i]] = nn; + ++i; + acn = nn; + } + } + if (acn->final == 0) { /* New pattern, add to final set */ + acn->final = n; + } +} + +/* + * Called when all search patterns are added. + */ +static void ac_compute_failure_functions(ACTrie *act, ACNode **qbuff) +{ + ACNode *root = act->root; + ACNode *parent; + int i; + int qh = 0,qt = 0; + ACNode *child, *r; + + /* Set all children of the root to have the root as failure function */ + for (i = 0; i < ALPHABET_SIZE; ++i) { + if (root->g[i] != NULL) { + root->g[i]->h = root; + /* Add to que for later traversal */ + qbuff[qt++] = root->g[i]; + } + } + + /* So, now we've handled children of the root state, traverse the + rest of the trie BF... */ + while (qh < qt) { + parent = qbuff[qh++]; + for (i = 0; i < ALPHABET_SIZE; ++ i) { + if ((child = parent->g[i]) != NULL) { + /* Visit this node to */ + qbuff[qt++] = child; + /* Search for correct failure function, follow the parents failure + function until you find a similar transition funtion to this + childs */ + r = parent->h; + while (r != NULL && r->g[i] == NULL) { + r = r->h; + } + if (r == NULL) { + /* Replace NULL failures with the root as we go */ + child->h = (root->g[i] == NULL) ? root : root->g[i]; + } else { + child->h = r->g[i]; + /* + * The "final" set is scattered among the nodes. When + * the failure function points to a member of the final set, + * we have a match, but we might not see it in the current node + * if we dont mark it as a special type of final, i.e. foolow + * the failure function and you will find a real member of final + * set. This is marked with a negative string id and only done if + * this node does not represent a member in the final set. + */ + if (!(child->final) && (child->h->final)) { + child->final = -1; + } + } + } + } + } + /* Finally the failure function of the root should point to itself */ + root->h = root; +} + +/* + * The actual searching for needles in the haystack... + * Find first match using Aho-Coracick Trie + * return pattern number and fill in mpos + mlen if found, otherwise return 0 + * Return the matching pattern that *starts* first, not ends + * first (difference when overlapping), hence the candidate thing. + * Basic AC finds the first end before the first start... + * + */ +static Uint ac_find_first_match(ACTrie *act, byte *haystack, Uint len, + Uint *mpos, Uint *mlen) +{ + ACNode *q = act->root; + Uint i = 0; + ACNode *candidate = NULL, *r; + Uint candidate_start = 0 /* Init not needed, just quiet the compiler */; + Uint rstart; + + while (i < len) { + while (q->g[haystack[i]] == NULL && q->h != q) { + q = q->h; + } + if (q->g[haystack[i]] != NULL) { + q = q->g[haystack[i]]; + } +#ifdef HARDDEBUG + erts_printf("ch = %c, Current: %u\n", (int) haystack[i], (unsigned) q->id); +#endif + ++i; + if (candidate != NULL && (i - q->d) > candidate_start) { + break; + } + if (q->final) { + r = q; + while (r->final < 0) + r = r->h; + rstart = i - r->d; + if (candidate == NULL || rstart < candidate_start) { + candidate_start = rstart; + candidate = r; + } + } + } + if (!candidate) { + return 0; + } +#ifdef HARDDEBUG + dump_ac_node(candidate,0,'?'); +#endif + *mpos = candidate_start; + *mlen = candidate->d; + return candidate->final; +} + +typedef struct _findall_data { + Uint pos; + Uint len; +#ifdef HARDDEBUG + Uint id; +#endif + Eterm epos; + Eterm elen; +#if 0 + Eterm eid; +#endif +} FindallData; +/* + * Returns number of non overlapping matches + */ +static Uint ac_find_all_non_overlapping(ACTrie *act, byte *haystack, Uint len, + FindallData **data) +{ + ACNode *q = act->root; + Uint i = 0; + Uint rstart; + ACNode *r; + Uint m = 0, save_m; + Uint allocated = 0; + FindallData *out = NULL; + + + while (i < len) { + while (q->g[haystack[i]] == NULL && q->h != q) { + q = q->h; + } + if (q->g[haystack[i]] != NULL) { + q = q->g[haystack[i]]; + } +#ifdef HARDDEBUG + erts_printf("ch = %c, Current: %u\n", (int) haystack[i], (unsigned) q->id); +#endif + ++i; + if (q->final) { + r = q; + while (r->final) { + while (r->final < 0) + r = r->h; +#ifdef HARDDEBUG + erts_printf("Trying to add %u\n",(unsigned) r->final); +#endif + rstart = i - r->d; + save_m = m; + while (m > 0 && out[m-1].pos > rstart) { +#ifdef HARDDEBUG + erts_printf("Popping %u\n",(unsigned) out[m-1].id); +#endif + --m; + } + if (m == 0 || out[m-1].pos + out[m-1].len <= rstart) { + if (m >= allocated) { + if (!allocated) { + allocated = 10; + out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * allocated); + } else { + allocated *= 2; + out = erts_realloc(ERTS_ALC_T_TMP, out, + sizeof(FindallData) * allocated); + } + } + out[m].pos = rstart; + out[m].len = r->d; +#ifdef HARDDEBUG + out[m].id = r->final; +#endif + ++m; +#ifdef HARDDEBUG + erts_printf("Pushing %u\n",(unsigned) out[m-1].id); +#endif + } else { +#ifdef HARDDEBUG + erts_printf("Backtracking %d steps\n",save_m - m); +#endif + m = save_m; + } + r = r->h; + } + } + } + *data = out; + return m; +} + +/* + * Boyer More - most obviously implemented more or less exactly as Christian Charras + * and Thierry Lecroq describes it in "Handbook of Exact String-Matching Algorithms" + * http://www-igm.univ-mlv.fr/~lecroq/string/ + */ + +/* + * Call this to compute badshifts array + */ +static void compute_badshifts(BMData *bmd) +{ + Sint i; + Sint m = bmd->len; + + for (i = 0; i < ALPHABET_SIZE; ++i) { + bmd->badshift[i] = m; + } + for (i = 0; i < m - 1; ++i) { + bmd->badshift[bmd->x[i]] = m - i - 1; + } +} + +/* Helper for "compute_goodshifts" */ +static void compute_suffixes(byte *x, Sint m, Sint *suffixes) +{ + int f,g,i; + + suffixes[m - 1] = m; + + f = 0; /* To avoid use before set warning */ + + g = m - 1; + + for (i = m - 2; i >= 0; --i) { + if (i > g && suffixes[i + m - f] < i - g) { + suffixes[i] = suffixes[i + m - 1 - f]; + } else { + if (i < g) { + g = i; + } + f = i; + while ( g >= 0 && x[g] == x[g + m - 1 - f] ) { + --g; + } + suffixes[i] = f - g; + } + } +} + +/* + * Call this to compute goodshift array + */ +static void compute_goodshifts(BMData *bmd) +{ + Sint m = bmd->len; + byte *x = bmd->x; + Sint i, j; + Sint *suffixes = erts_alloc(ERTS_ALC_T_TMP, m * sizeof(Uint)); + + compute_suffixes(x, m, suffixes); + + for (i = 0; i < m; ++i) { + bmd->goodshift[i] = m; + } + + j = 0; + + for (i = m - 1; i >= -1; --i) { + if (i == -1 || suffixes[i] == i + 1) { + while (j < m - 1 - i) { + if (bmd->goodshift[j] == m) { + bmd->goodshift[j] = m - 1 - i; + } + ++j; + } + } + } + for (i = 0; i <= m - 2; ++i) { + bmd->goodshift[m - 1 - suffixes[i]] = m - 1 - i; + } + erts_free(ERTS_ALC_T_TMP, suffixes); +} + +static Sint bm_find_first_match(BMData *bmd, byte *haystack, Uint len) +{ + Sint blen = bmd->len; + Sint *gs = bmd->goodshift; + Sint *bs = bmd->badshift; + byte *needle = bmd->x; + Sint i; + Sint j = 0; + + while (j <= len - blen) { + for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i) + ; + if (i < 0) { /* found */ + return j; + } + j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i); + } + return -1; +} + +/* + * Interface functions (i.e. "bif's") + */ + +/* + * Search functionality interfaces + */ +BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1) +{ + Eterm t, b, comp_term = NIL; + Uint characters; + Uint words; + int return_tuple = 0; + + characters = 0; + words = 0; + + if (is_list(BIF_ARG_1)) { + return_tuple = 1; + t = BIF_ARG_1; + while (is_list(t)) { + b = CAR(list_val(t)); + t = CDR(list_val(t)); + if (!is_binary(b)) { + goto badarg; + } + if (binary_bitsize(b) != 0) { + goto badarg; + } + ++words; + characters += binary_size(b); + } + if (is_not_nil(t)) { + goto badarg; + } + if (words > 1) { + comp_term = BIF_ARG_1; + } else { + comp_term = CAR(list_val(BIF_ARG_1)); + } + } else if (is_binary(BIF_ARG_1)) { + if (binary_bitsize(BIF_ARG_1) != 0) { + goto badarg; + } + words = 1; + comp_term = BIF_ARG_1; + characters = binary_size(BIF_ARG_1); + } + + if (characters == 0) { + goto badarg; + } + ASSERT(words > 0); + + if (words == 1) { + Eterm ret; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + MyAllocator my; + BMData *bmd; + Eterm *hp; + + ERTS_GET_BINARY_BYTES(comp_term, bytes, bitoffs, bitsize); + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(comp_term, &temp_alloc); + } + bmd = create_bmdata(&my, BIF_P, bytes, characters, &ret); + bmd->ret_tuple = return_tuple; + compute_badshifts(bmd); + compute_goodshifts(bmd); + erts_free_aligned_binary_bytes(temp_alloc); + CHECK_ALLOCATOR(my); + hp = HAlloc(BIF_P,3); + ret = TUPLE2(hp, am_bm, ret); + BIF_RET(ret); + } else { + Eterm ret; + ACTrie *act; + MyAllocator my; + Eterm *hp; + ACNode **qbuff; + + act = create_acdata(&my, BIF_P, characters, &qbuff, &ret); + t = comp_term; + while (is_list(t)) { + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + b = CAR(list_val(t)); + t = CDR(list_val(t)); + ERTS_GET_BINARY_BYTES(b, bytes, bitoffs, bitsize); + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(b, &temp_alloc); + } + ac_add_one_pattern(&my,act,bytes,binary_size(b)); + erts_free_aligned_binary_bytes(temp_alloc); + } + ac_compute_failure_functions(act,qbuff); + CHECK_ALLOCATOR(my); + erts_free(ERTS_ALC_T_TMP,qbuff); + hp = HAlloc(BIF_P,3); + ret = TUPLE2(hp, am_ac, ret); + BIF_RET(ret); + } + badarg: + BIF_ERROR(BIF_P,BADARG); +} + +BIF_RETTYPE binary_match_3(BIF_ALIST_3) +{ + Uint hsstart, hslen; + Eterm *tp; + if (is_not_binary(BIF_ARG_1)) { + goto badarg; + } + if (BIF_ARG_3 == ((Eterm) 0)) { + /* Invalid term, we're called from binary_match_2... */ + hsstart = 0; + hslen = binary_size(BIF_ARG_1); + } else if (is_tuple(BIF_ARG_3)) { + tp = tuple_val(BIF_ARG_3); + if (arityval(*tp) != 2) { + goto badarg; + } + if (!term_to_Uint(tp[1], &hsstart) || ((hsstart >> 16) >> 16) != 0) { + goto badarg; + } + if (!term_to_Uint(tp[2], &hslen) || ((hslen >> 16) >> 16) != 0) { + goto badarg; + } + if (hslen < hsstart) { + goto badarg; + } + if (hslen > binary_size(BIF_ARG_1)-1) { + goto badarg; /* XXX:PaN or should we take as much as we have ? */ + } + hslen = hslen + 1 - hsstart; + } else { + goto badarg; + } + if (hslen == 0) { + BIF_RET(am_nomatch); + } + if (is_tuple(BIF_ARG_2)) { + tp = tuple_val(BIF_ARG_2); + if (arityval(*tp) != 2 || is_not_atom(tp[1])) { + goto badarg; + } + if (tp[1] == am_bm && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { + Binary *mbp; + BMData *bm; + Sint pos; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + Eterm ret; + Eterm *hp; + mbp = ((ProcBin *) binary_val(tp[2]))->val; + if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) { + goto badarg; + } + bm = (BMData *) ERTS_MAGIC_BIN_DATA(mbp); +#ifdef HARDDEBUG + dump_bm_data(bm); +#endif + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + } + pos = bm_find_first_match(bm, bytes + hsstart, hslen); + if (pos < 0) { + ret = am_nomatch; + } else { + Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P); + ret = erts_make_integer(pos+hsstart,BIF_P); + if (bm->ret_tuple) { + hp = HAlloc(BIF_P,3); + ret = TUPLE2(hp, ret, erlen); + } + } + erts_free_aligned_binary_bytes(temp_alloc); + BIF_RET(ret); + } else if (tp[1] == am_ac && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { + Binary *mbp; + ACTrie *act; + Uint pos, msn,rlen; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + Eterm ret; + Eterm *hp; + + mbp = ((ProcBin *) binary_val(tp[2]))->val; + if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) { + goto badarg; + } + act = (ACTrie *) ERTS_MAGIC_BIN_DATA(mbp); +#ifdef HARDDEBUG + dump_ac_trie(act); +#endif + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + } + msn = ac_find_first_match(act, bytes + hsstart, + hslen, &pos, &rlen); + if (msn == 0) { + ret = am_nomatch; + } else { + Eterm epos = erts_make_integer(pos+hsstart,BIF_P); + Eterm erlen = erts_make_integer(rlen,BIF_P); + hp = HAlloc(BIF_P,3); + ret = TUPLE2(hp, epos, erlen); + } + erts_free_aligned_binary_bytes(temp_alloc); + BIF_RET(ret); + } else { + goto badarg; + } + } else { + goto badarg; /* Compilation on the fly NYI */ + } + badarg: + BIF_ERROR(BIF_P,BADARG); +} +BIF_RETTYPE binary_match_2(BIF_ALIST_2) +{ + return binary_match_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); +} + +BIF_RETTYPE binary_matches_3(BIF_ALIST_3) +{ + Uint hsstart, hslen; + Eterm *tp; + if (is_not_binary(BIF_ARG_1)) { + goto badarg; + } + if (BIF_ARG_3 == ((Eterm) 0)) { + /* Invalid term, we're called from binary_match_2... */ + hsstart = 0; + hslen = binary_size(BIF_ARG_1); + } else if (is_tuple(BIF_ARG_3)) { + tp = tuple_val(BIF_ARG_3); + if (arityval(*tp) != 2) { + goto badarg; + } + if (!term_to_Uint(tp[1], &hsstart) || ((hsstart >> 16) >> 16) != 0) { + goto badarg; + } + if (!term_to_Uint(tp[2], &hslen) || ((hslen >> 16) >> 16) != 0) { + goto badarg; + } + if (hslen < hsstart) { + goto badarg; + } + if (hslen > binary_size(BIF_ARG_1)-1) { + goto badarg; /* XXX:PaN or should we take as much as we have ? */ + } + hslen = hslen + 1 - hsstart; + } else { + goto badarg; + } + if (hslen == 0) { + BIF_RET(am_nomatch); + } + if (is_tuple(BIF_ARG_2)) { + tp = tuple_val(BIF_ARG_2); + if (arityval(*tp) != 2 || is_not_atom(tp[1])) { + goto badarg; + } + if (tp[1] == am_bm && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { + Binary *mbp; + BMData *bm; + Sint pos; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + Eterm ret; + Eterm *hp; + mbp = ((ProcBin *) binary_val(tp[2]))->val; + if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) { + goto badarg; + } + bm = (BMData *) ERTS_MAGIC_BIN_DATA(mbp); +#ifdef HARDDEBUG + dump_bm_data(bm); +#endif + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + } + pos = bm_find_first_match(bm, bytes + hsstart, hslen); + if (pos < 0) { + ret = am_nomatch; + } else { + Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P); + ret = erts_make_integer(pos,BIF_P); + if (bm->ret_tuple) { + hp = HAlloc(BIF_P,3); + ret = TUPLE2(hp, ret, erlen); + } + } + erts_free_aligned_binary_bytes(temp_alloc); + BIF_RET(ret); + } else if (tp[1] == am_ac && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { + Binary *mbp; + ACTrie *act; + Uint rlen; + Sint i; + FindallData *fad; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + Eterm ret,tpl; + Eterm *hp; + + mbp = ((ProcBin *) binary_val(tp[2]))->val; + if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) { + goto badarg; + } + act = (ACTrie *) ERTS_MAGIC_BIN_DATA(mbp); +#ifdef HARDDEBUG + dump_ac_trie(act); +#endif + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + } + rlen = ac_find_all_non_overlapping(act, bytes + hsstart, + hslen, &fad); + if (rlen == 0) { + ret = am_nomatch; + } else { + for (i = 0; i < rlen; ++i) { + fad[i].epos = erts_make_integer(fad[i].pos,BIF_P); + fad[i].elen = erts_make_integer(fad[i].len,BIF_P); + } + hp = HAlloc(BIF_P,rlen * (3 + 2)); + ret = NIL; + for (i = rlen - 1; i >= 0; --i) { + tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); + hp +=3; + ret = CONS(hp,tpl,ret); + hp += 2; + } + } + erts_free_aligned_binary_bytes(temp_alloc); + if (fad != NULL) { + erts_free(ERTS_ALC_T_TMP,fad); + } + BIF_RET(ret); + } else { + goto badarg; + } + } else { + goto badarg; /* Compilation on the fly NYI */ + } + badarg: + BIF_ERROR(BIF_P,BADARG); +} +BIF_RETTYPE binary_matches_2(BIF_ALIST_2) +{ + return binary_matches_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); +} + +/* + * Hard debug functions (dump) for the search structures + */ + +#ifdef HARDDEBUG +static void dump_bm_data(BMData *bm) +{ + int i,j; + erts_printf("Dumping Boyer-More structure.\n"); + erts_printf("=============================\n"); + erts_printf("Return tuple: %d\n",bm->ret_tuple); + erts_printf("Searchstring [%ld]:\n", bm->len); + erts_printf("<<"); + for (i = 0; i < bm->len; ++i) { + if (i > 0) { + erts_printf(", "); + } + erts_printf("%d", (int) bm->x[i]); + if (bm->x[i] >= 'A') { + erts_printf(" ($%c)",(char) bm->x[i]); + } + } + erts_printf(">>\n"); + erts_printf("GoodShift array:\n"); + for (i = 0; i < bm->len; ++i) { + erts_printf("GoodShift[%d]: %ld\n", i, bm->goodshift[i]); + } + erts_printf("BadShift array:\n"); + j = 0; + for (i = 0; i < ALPHABET_SIZE; i += j) { + for (j = 0; i + j < ALPHABET_SIZE && j < 6; ++j) { + erts_printf("BS[%03d]:%02ld, ", i+j, bm->badshift[i+j]); + } + erts_printf("\n"); + } +} + +static void dump_ac_node(ACNode *node, int indent, int ch) { + int i; + char *spaces = erts_alloc(ERTS_ALC_T_TMP, 10 * indent + 1); + memset(spaces,' ',10*indent); + spaces[10*indent] = '\0'; + erts_printf("%s-> %c\n",spaces,ch); + erts_printf("%sId: %u\n",spaces,(unsigned) node->id); + erts_printf("%sD: %u\n",spaces,(unsigned)node->d); + erts_printf("%sFinal: %d\n",spaces,(int)node->final); + erts_printf("%sFail: %u\n",spaces,(unsigned)node->h->id); + erts_free(ERTS_ALC_T_TMP,spaces); + for(i=0;ig[i] != NULL && node->g[i] != node) { + dump_ac_node(node->g[i],indent+1,i); + } + } +} + + +static void dump_ac_trie(ACTrie *act) +{ + erts_printf("Aho Corasick Trie dump.\n"); + erts_printf("=======================\n"); + erts_printf("Node counter: %u\n", (unsigned) act->idc); + erts_printf("Searchstring counter: %u\n", (unsigned) act->counter); + erts_printf("Trie:\n"); + dump_ac_node(act->root, 0, '0'); + return; +} +#endif -- cgit v1.2.3 From e0c4c2867c20368c5b5d88cbbf92da7b7a3f386e Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Mon, 2 Nov 2009 15:55:20 +0100 Subject: Teach binary.c the semantics to take longest instead of shortest match Add testcase embryos and reference implementation. Change name of compile function according to EEP31. --- erts/emulator/beam/bif.tab | 2 +- erts/emulator/beam/binary.c | 411 ++++++++++++++++++++++++-------------------- 2 files changed, 225 insertions(+), 188 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 8d81aa7eba..3f51e6dc45 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -760,7 +760,7 @@ bif erlang:finish_after_on_load/2 # # The searching/splitting/substituting thingies # -bif binary:match_compile/1 +bif binary:compile_pattern/1 bif binary:match/2 bif binary:match/3 bif binary:matches/2 diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c index 29c1af2114..4662e60d51 100644 --- a/erts/emulator/beam/binary.c +++ b/erts/emulator/beam/binary.c @@ -802,13 +802,12 @@ static void cleanup_my_data(Binary *bp) * Initiate a (allocated) micro allocator and fill in the base * for an Aho-Corasick search trie, given the accumulated length of the search strings. */ -static ACTrie *create_acdata(MyAllocator *my, Process *p, Uint len, - ACNode ***qbuff /* out */,Eterm *the_bin /* out */) +static ACTrie *create_acdata(MyAllocator *my, Uint len, + ACNode ***qbuff /* out */,Binary **the_bin /* out */) { Uint datasize = AC_SIZE(len); ACTrie *act; ACNode *acn; - Eterm *hp; Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data); byte *data = ERTS_MAGIC_BIN_DATA(mb); @@ -826,19 +825,17 @@ static ACTrie *create_acdata(MyAllocator *my, Process *p, Uint len, acn->id = 0; #endif *qbuff = erts_alloc(ERTS_ALC_T_TMP, sizeof(ACNode *) * len); - hp = HAlloc(p, PROC_BIN_SIZE); - *the_bin = erts_mk_magic_binary_term(&hp, &MSO(p), mb); + *the_bin = mb; return act; } /* * The same initialization of allocator and basic data for Boyer-More. */ -static BMData *create_bmdata(MyAllocator *my, Process *p, byte *x, Uint len, Eterm *the_bin /* out */) +static BMData *create_bmdata(MyAllocator *my, byte *x, Uint len, Binary **the_bin /* out */) { Uint datasize = BM_SIZE(len); BMData *bmd; - Eterm *hp; Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data); byte *data = ERTS_MAGIC_BIN_DATA(mb); init_my_allocator(my, datasize, data); @@ -848,8 +845,7 @@ static BMData *create_bmdata(MyAllocator *my, Process *p, byte *x, Uint len, Ete bmd->len = len; bmd->goodshift = my_alloc(my,sizeof(Uint) * len); bmd->ret_tuple = 0; - hp = HAlloc(p, PROC_BIN_SIZE); - *the_bin = erts_mk_magic_binary_term(&hp, &MSO(p), mb); + *the_bin = mb; return bmd; } @@ -998,7 +994,8 @@ static Uint ac_find_first_match(ACTrie *act, byte *haystack, Uint len, while (r->final < 0) r = r->h; rstart = i - r->d; - if (candidate == NULL || rstart < candidate_start) { + if (candidate == NULL || rstart < candidate_start || + (rstart == candidate_start && candidate->d < q->d)) { candidate_start = rstart; candidate = r; } @@ -1063,12 +1060,21 @@ static Uint ac_find_all_non_overlapping(ACTrie *act, byte *haystack, Uint len, #endif rstart = i - r->d; save_m = m; - while (m > 0 && out[m-1].pos > rstart) { + while (m > 0 && (out[m-1].pos > rstart || + (out[m-1].pos == rstart && + out[m-1].len < r->d))) { #ifdef HARDDEBUG erts_printf("Popping %u\n",(unsigned) out[m-1].id); #endif --m; } +#ifdef HARDDEBUG + if (m > 0) { + erts_printf("Pos %u\n",out[m-1].pos); + erts_printf("Len %u\n",out[m-1].len); + } + erts_printf("Rstart %u\n",rstart); +#endif if (m == 0 || out[m-1].pos + out[m-1].len <= rstart) { if (m >= allocated) { if (!allocated) { @@ -1213,7 +1219,8 @@ static Sint bm_find_first_match(BMData *bmd, byte *haystack, Uint len) /* * Search functionality interfaces */ -BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1) + +static int do_binary_match_compile(Eterm argument, Eterm *tag, Binary **binp) { Eterm t, b, comp_term = NIL; Uint characters; @@ -1223,9 +1230,9 @@ BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1) characters = 0; words = 0; - if (is_list(BIF_ARG_1)) { + if (is_list(argument)) { return_tuple = 1; - t = BIF_ARG_1; + t = argument; while (is_list(t)) { b = CAR(list_val(t)); t = CDR(list_val(t)); @@ -1242,17 +1249,17 @@ BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1) goto badarg; } if (words > 1) { - comp_term = BIF_ARG_1; + comp_term = argument; } else { - comp_term = CAR(list_val(BIF_ARG_1)); + comp_term = CAR(list_val(argument)); } - } else if (is_binary(BIF_ARG_1)) { - if (binary_bitsize(BIF_ARG_1) != 0) { + } else if (is_binary(argument)) { + if (binary_bitsize(argument) != 0) { goto badarg; } words = 1; - comp_term = BIF_ARG_1; - characters = binary_size(BIF_ARG_1); + comp_term = argument; + characters = binary_size(argument); } if (characters == 0) { @@ -1261,35 +1268,33 @@ BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1) ASSERT(words > 0); if (words == 1) { - Eterm ret; byte *bytes; Uint bitoffs, bitsize; byte *temp_alloc = NULL; MyAllocator my; BMData *bmd; - Eterm *hp; + Binary *bin; ERTS_GET_BINARY_BYTES(comp_term, bytes, bitoffs, bitsize); if (bitoffs != 0) { bytes = erts_get_aligned_binary_bytes(comp_term, &temp_alloc); } - bmd = create_bmdata(&my, BIF_P, bytes, characters, &ret); + bmd = create_bmdata(&my, bytes, characters, &bin); bmd->ret_tuple = return_tuple; compute_badshifts(bmd); compute_goodshifts(bmd); erts_free_aligned_binary_bytes(temp_alloc); CHECK_ALLOCATOR(my); - hp = HAlloc(BIF_P,3); - ret = TUPLE2(hp, am_bm, ret); - BIF_RET(ret); + *tag = am_bm; + *binp = bin; + return 0; } else { - Eterm ret; ACTrie *act; MyAllocator my; - Eterm *hp; ACNode **qbuff; + Binary *bin; - act = create_acdata(&my, BIF_P, characters, &qbuff, &ret); + act = create_acdata(&my, characters, &qbuff, &bin); t = comp_term; while (is_list(t)) { byte *bytes; @@ -1307,18 +1312,37 @@ BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1) ac_compute_failure_functions(act,qbuff); CHECK_ALLOCATOR(my); erts_free(ERTS_ALC_T_TMP,qbuff); - hp = HAlloc(BIF_P,3); - ret = TUPLE2(hp, am_ac, ret); - BIF_RET(ret); + *tag = am_ac; + *binp = bin; + return 0; } badarg: - BIF_ERROR(BIF_P,BADARG); + return -1; +} + +BIF_RETTYPE binary_compile_pattern_1(BIF_ALIST_1) +{ + Binary *bin; + Eterm tag, ret; + Eterm *hp; + + if (do_binary_match_compile(BIF_ARG_1,&tag,&bin)) { + BIF_ERROR(BIF_P,BADARG); + } + hp = HAlloc(BIF_P, PROC_BIN_SIZE+3); + ret = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin); + ret = TUPLE2(hp, tag, ret); + BIF_RET(ret); } + BIF_RETTYPE binary_match_3(BIF_ALIST_3) { Uint hsstart, hslen; Eterm *tp; + Eterm type; + Binary *bin; + Eterm bin_term = NIL; if (is_not_binary(BIF_ARG_1)) { goto badarg; } @@ -1355,85 +1379,90 @@ BIF_RETTYPE binary_match_3(BIF_ALIST_3) if (arityval(*tp) != 2 || is_not_atom(tp[1])) { goto badarg; } - if (tp[1] == am_bm && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { - Binary *mbp; - BMData *bm; - Sint pos; - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - Eterm ret; - Eterm *hp; - mbp = ((ProcBin *) binary_val(tp[2]))->val; - if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) { - goto badarg; - } - bm = (BMData *) ERTS_MAGIC_BIN_DATA(mbp); + if (((tp[1] != am_bm) && (tp[1] != am_ac)) || + !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { + goto badarg; + } + type = tp[1]; + bin = ((ProcBin *) binary_val(tp[2]))->val; + if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) { + goto badarg; + } + bin_term = tp[2]; + } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) { + goto badarg; + } + + if (type == am_bm) { + BMData *bm; + Sint pos; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + Eterm ret; + Eterm *hp; + bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG - dump_bm_data(bm); + dump_bm_data(bm); #endif - ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); - } - pos = bm_find_first_match(bm, bytes + hsstart, hslen); - if (pos < 0) { - ret = am_nomatch; - } else { - Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P); - ret = erts_make_integer(pos+hsstart,BIF_P); - if (bm->ret_tuple) { - hp = HAlloc(BIF_P,3); - ret = TUPLE2(hp, ret, erlen); - } + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + } + pos = bm_find_first_match(bm, bytes + hsstart, hslen); + if (pos < 0) { + ret = am_nomatch; + } else { + Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P); + ret = erts_make_integer(pos+hsstart,BIF_P); + if (bm->ret_tuple) { + hp = HAlloc(BIF_P,3); + ret = TUPLE2(hp, ret, erlen); } - erts_free_aligned_binary_bytes(temp_alloc); - BIF_RET(ret); - } else if (tp[1] == am_ac && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { - Binary *mbp; - ACTrie *act; - Uint pos, msn,rlen; - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - Eterm ret; - Eterm *hp; + } + erts_free_aligned_binary_bytes(temp_alloc); + if (bin_term == NIL) { + erts_bin_free(bin); + } + BIF_RET(ret); + } else if (type == am_ac) { + ACTrie *act; + Uint pos, msn,rlen; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + Eterm ret; + Eterm *hp; - mbp = ((ProcBin *) binary_val(tp[2]))->val; - if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) { - goto badarg; - } - act = (ACTrie *) ERTS_MAGIC_BIN_DATA(mbp); + act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG - dump_ac_trie(act); + dump_ac_trie(act); #endif - ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); - } - msn = ac_find_first_match(act, bytes + hsstart, - hslen, &pos, &rlen); - if (msn == 0) { - ret = am_nomatch; - } else { - Eterm epos = erts_make_integer(pos+hsstart,BIF_P); - Eterm erlen = erts_make_integer(rlen,BIF_P); - hp = HAlloc(BIF_P,3); - ret = TUPLE2(hp, epos, erlen); - } - erts_free_aligned_binary_bytes(temp_alloc); - BIF_RET(ret); - } else { + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + if (bitsize != 0) { goto badarg; } - } else { - goto badarg; /* Compilation on the fly NYI */ + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + } + msn = ac_find_first_match(act, bytes + hsstart, + hslen, &pos, &rlen); + if (msn == 0) { + ret = am_nomatch; + } else { + Eterm epos = erts_make_integer(pos+hsstart,BIF_P); + Eterm erlen = erts_make_integer(rlen,BIF_P); + hp = HAlloc(BIF_P,3); + ret = TUPLE2(hp, epos, erlen); + } + erts_free_aligned_binary_bytes(temp_alloc); + if (bin_term == NIL) { + erts_bin_free(bin); + } + BIF_RET(ret); } badarg: BIF_ERROR(BIF_P,BADARG); @@ -1447,6 +1476,9 @@ BIF_RETTYPE binary_matches_3(BIF_ALIST_3) { Uint hsstart, hslen; Eterm *tp; + Eterm type; + Binary *bin; + Eterm bin_term = NIL; if (is_not_binary(BIF_ARG_1)) { goto badarg; } @@ -1483,98 +1515,103 @@ BIF_RETTYPE binary_matches_3(BIF_ALIST_3) if (arityval(*tp) != 2 || is_not_atom(tp[1])) { goto badarg; } - if (tp[1] == am_bm && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { - Binary *mbp; - BMData *bm; - Sint pos; - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - Eterm ret; - Eterm *hp; - mbp = ((ProcBin *) binary_val(tp[2]))->val; - if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) { - goto badarg; - } - bm = (BMData *) ERTS_MAGIC_BIN_DATA(mbp); + if (((tp[1] != am_bm) && (tp[1] != am_ac)) || + !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { + goto badarg; + } + type = tp[1]; + bin = ((ProcBin *) binary_val(tp[2]))->val; + if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) { + goto badarg; + } + bin_term = tp[2]; + } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) { + goto badarg; + } + + if (type == am_bm) { + BMData *bm; + Sint pos; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + Eterm ret; + Eterm *hp; + bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG - dump_bm_data(bm); + dump_bm_data(bm); #endif - ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); - } - pos = bm_find_first_match(bm, bytes + hsstart, hslen); - if (pos < 0) { - ret = am_nomatch; - } else { - Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P); - ret = erts_make_integer(pos,BIF_P); - if (bm->ret_tuple) { - hp = HAlloc(BIF_P,3); - ret = TUPLE2(hp, ret, erlen); - } + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + } + pos = bm_find_first_match(bm, bytes + hsstart, hslen); + if (pos < 0) { + ret = am_nomatch; + } else { + Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P); + ret = erts_make_integer(pos,BIF_P); + if (bm->ret_tuple) { + hp = HAlloc(BIF_P,3); + ret = TUPLE2(hp, ret, erlen); } - erts_free_aligned_binary_bytes(temp_alloc); - BIF_RET(ret); - } else if (tp[1] == am_ac && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { - Binary *mbp; - ACTrie *act; - Uint rlen; - Sint i; - FindallData *fad; - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - Eterm ret,tpl; - Eterm *hp; + } + erts_free_aligned_binary_bytes(temp_alloc); + if (bin_term == NIL) { + erts_bin_free(bin); + } + BIF_RET(ret); + } else if (type == am_ac) { + ACTrie *act; + Uint rlen; + Sint i; + FindallData *fad; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + Eterm ret,tpl; + Eterm *hp; - mbp = ((ProcBin *) binary_val(tp[2]))->val; - if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) { - goto badarg; - } - act = (ACTrie *) ERTS_MAGIC_BIN_DATA(mbp); + act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG - dump_ac_trie(act); + dump_ac_trie(act); #endif - ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); - } - rlen = ac_find_all_non_overlapping(act, bytes + hsstart, - hslen, &fad); - if (rlen == 0) { - ret = am_nomatch; - } else { - for (i = 0; i < rlen; ++i) { - fad[i].epos = erts_make_integer(fad[i].pos,BIF_P); - fad[i].elen = erts_make_integer(fad[i].len,BIF_P); - } - hp = HAlloc(BIF_P,rlen * (3 + 2)); - ret = NIL; - for (i = rlen - 1; i >= 0; --i) { - tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); - hp +=3; - ret = CONS(hp,tpl,ret); - hp += 2; - } + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + } + rlen = ac_find_all_non_overlapping(act, bytes + hsstart, + hslen, &fad); + if (rlen == 0) { + ret = am_nomatch; + } else { + for (i = 0; i < rlen; ++i) { + fad[i].epos = erts_make_integer(fad[i].pos,BIF_P); + fad[i].elen = erts_make_integer(fad[i].len,BIF_P); } - erts_free_aligned_binary_bytes(temp_alloc); - if (fad != NULL) { - erts_free(ERTS_ALC_T_TMP,fad); + hp = HAlloc(BIF_P,rlen * (3 + 2)); + ret = NIL; + for (i = rlen - 1; i >= 0; --i) { + tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); + hp +=3; + ret = CONS(hp,tpl,ret); + hp += 2; } - BIF_RET(ret); - } else { - goto badarg; } - } else { - goto badarg; /* Compilation on the fly NYI */ + erts_free_aligned_binary_bytes(temp_alloc); + if (fad != NULL) { + erts_free(ERTS_ALC_T_TMP,fad); + } + if (bin_term == NIL) { + erts_bin_free(bin); + } + BIF_RET(ret); } badarg: BIF_ERROR(BIF_P,BADARG); -- cgit v1.2.3 From ba8c9c7c1594b4870936814caf3520a0f4e312f7 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Fri, 16 Apr 2010 18:11:49 +0200 Subject: Teach BIF's binary:match/matches interrupting/restarting Add Boyer More implementation of binary:matches. Cleanup and removed unused code. --- erts/emulator/beam/atom.names | 2 + erts/emulator/beam/binary.c | 861 +++++++++++++++++++++++++++++++----------- erts/emulator/beam/erl_init.c | 1 + erts/emulator/beam/global.h | 1 + 4 files changed, 646 insertions(+), 219 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index e63d56b32a..4e3284a4cd 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -101,6 +101,8 @@ atom band atom big atom bif_return_trap atom binary +atom binary_match_trap +atom binary_matches_trap atom block atom blocked atom bm diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c index 4662e60d51..2b110e8b82 100644 --- a/erts/emulator/beam/binary.c +++ b/erts/emulator/beam/binary.c @@ -676,21 +676,45 @@ bitstr_list_len(Eterm obj) return (Sint) -1; } -#define HARDDEBUG - /* * The native implementation functions for the module binary. * Searching is implemented using aither Boyer-More or Aho-Corasick * depending on number of searchstrings (BM if one, AC if more than one). - * Native implementation is for efficiency, nothing really *needs* to be - * implemented in native code. + * Native implementation is mostly for efficiency, nothing (except binary:referenced_byte_size) + * really *needs* to be implemented in native code. */ +/* #define HARDDEBUG */ + /* * A micro allocator used when building search structures, just a convenience * for building structures inside a pre alocated magic binary using conventional * malloc-like interface. */ +static Export binary_match_trap_export; +static BIF_RETTYPE binary_match_trap(BIF_ALIST_3); +static Export binary_matches_trap_export; +static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3); +void erts_init_bif_binary(void) +{ + sys_memset((void *) &binary_match_trap_export, 0, sizeof(Export)); + binary_match_trap_export.address = &binary_match_trap_export.code[3]; + binary_match_trap_export.code[0] = am_erlang; + binary_match_trap_export.code[1] = am_binary_match_trap; + binary_match_trap_export.code[2] = 3; + binary_match_trap_export.code[3] = (BeamInstr) em_apply_bif; + binary_match_trap_export.code[4] = (BeamInstr) &binary_match_trap; + + sys_memset((void *) &binary_matches_trap_export, 0, sizeof(Export)); + binary_matches_trap_export.address = &binary_matches_trap_export.code[3]; + binary_matches_trap_export.code[0] = am_erlang; + binary_matches_trap_export.code[1] = am_binary_matches_trap; + binary_matches_trap_export.code[2] = 3; + binary_matches_trap_export.code[3] = (BeamInstr) em_apply_bif; + binary_matches_trap_export.code[4] = (BeamInstr) &binary_matches_trap; + + return; +} #define MYALIGN(Size) (SIZEOF_VOID_P * (((Size) / SIZEOF_VOID_P) + \ !!(((Size) % SIZEOF_VOID_P)))) @@ -961,21 +985,55 @@ static void ac_compute_failure_functions(ACTrie *act, ACNode **qbuff) * The actual searching for needles in the haystack... * Find first match using Aho-Coracick Trie * return pattern number and fill in mpos + mlen if found, otherwise return 0 - * Return the matching pattern that *starts* first, not ends - * first (difference when overlapping), hence the candidate thing. + * Return the matching pattern that *starts* first, and ends + * last (difference when overlapping), hence the candidate thing. * Basic AC finds the first end before the first start... * */ -static Uint ac_find_first_match(ACTrie *act, byte *haystack, Uint len, - Uint *mpos, Uint *mlen) +typedef struct { + ACNode *q; + Uint pos; + Uint len; + ACNode *candidate; + Uint candidate_start; +} ACFindFirstState; + + +static void ac_init_find_first_match(ACFindFirstState *state, ACTrie *act, Sint startpos, Uint len) { - ACNode *q = act->root; - Uint i = 0; - ACNode *candidate = NULL, *r; - Uint candidate_start = 0 /* Init not needed, just quiet the compiler */; + state->q = act->root; + state->pos = startpos; + state->len = len; + state->candidate = NULL; + state->candidate_start = 0; +} +#define AC_OK 0 +#define AC_NOT_FOUND -1 +#define AC_RESTART -2 + +#define AC_LOOP_FACTOR 1 + +static int ac_find_first_match(ACFindFirstState *state, byte *haystack, + Uint *mpos, Uint *mlen, Uint reductions) +{ + ACNode *q = state->q; + Uint i = state->pos; + ACNode *candidate = state->candidate, *r; + Uint len = state->len; + Uint candidate_start = state->candidate_start; Uint rstart; + register Uint reds = (Uint) reductions; while (i < len) { + if (--reds == 0) { + state->q = q; + state->pos = i; + state->len = len; + state->candidate = candidate; + state->candidate_start = candidate_start; + return AC_RESTART; + } + while (q->g[haystack[i]] == NULL && q->h != q) { q = q->h; } @@ -1002,14 +1060,14 @@ static Uint ac_find_first_match(ACTrie *act, byte *haystack, Uint len, } } if (!candidate) { - return 0; + return AC_NOT_FOUND; } #ifdef HARDDEBUG dump_ac_node(candidate,0,'?'); #endif *mpos = candidate_start; *mlen = candidate->d; - return candidate->final; + return AC_OK; } typedef struct _findall_data { @@ -1020,35 +1078,86 @@ typedef struct _findall_data { #endif Eterm epos; Eterm elen; -#if 0 - Eterm eid; -#endif } FindallData; + +typedef struct { + ACNode *q; + Uint pos; + Uint len; + Uint m; + Uint allocated; + FindallData *out; +} ACFindAllState; + +static void ac_init_find_all(ACFindAllState *state, ACTrie *act, Sint startpos, Uint len) +{ + state->q = act->root; + state->pos = startpos; + state->len = len; + state->m = 0; + state->allocated = 0; + state->out = NULL; +} + +static void ac_restore_find_all(ACFindAllState *state, char *buff) +{ + memcpy(state,buff,sizeof(ACFindAllState)); + state->out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * (state->allocated)); + memcpy(state->out,buff+sizeof(ACFindAllState),sizeof(FindallData)*state->m); +} + +static void ac_serialize_find_all(ACFindAllState *state, char *buff) +{ + memcpy(buff,state,sizeof(ACFindAllState)); + memcpy(buff+sizeof(ACFindAllState),state->out,sizeof(FindallData)*state->m); +} + +static void ac_clean_find_all(ACFindAllState *state) +{ + if (state->out != NULL) { + erts_free(ERTS_ALC_T_TMP, state->out); + } +#ifdef HARDDEBUG + state->out = NULL; + state->allocated = 0; +#endif +} + +#define SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(S) (sizeof(ACFindAllState)+(sizeof(FindallData)*(S).m)) + /* - * Returns number of non overlapping matches + * Differs to the find_first function in that it stores all matches and the values + * arte returned only in the state. */ -static Uint ac_find_all_non_overlapping(ACTrie *act, byte *haystack, Uint len, - FindallData **data) +static int ac_find_all_non_overlapping(ACFindAllState *state, byte *haystack, Uint reductions) { - ACNode *q = act->root; - Uint i = 0; + ACNode *q = state->q; + Uint i = state->pos; Uint rstart; ACNode *r; - Uint m = 0, save_m; - Uint allocated = 0; - FindallData *out = NULL; + Uint len = state->len; + Uint m = state->m, save_m; + Uint allocated = state->allocated; + FindallData *out = state->out; + register Uint reds = (Uint) reductions; while (i < len) { + if (--reds == 0) { + state->q = q; + state->pos = i; + state->len = len; + state->m = m; + state->allocated = allocated; + state->out = out; + return AC_RESTART; + } while (q->g[haystack[i]] == NULL && q->h != q) { q = q->h; } if (q->g[haystack[i]] != NULL) { q = q->g[haystack[i]]; } -#ifdef HARDDEBUG - erts_printf("ch = %c, Current: %u\n", (int) haystack[i], (unsigned) q->id); -#endif ++i; if (q->final) { r = q; @@ -1105,8 +1214,9 @@ static Uint ac_find_all_non_overlapping(ACTrie *act, byte *haystack, Uint len, } } } - *data = out; - return m; + state->m = m; + state->out = out; + return (m == 0) ? AC_NOT_FOUND : AC_OK; } /* @@ -1192,16 +1302,39 @@ static void compute_goodshifts(BMData *bmd) erts_free(ERTS_ALC_T_TMP, suffixes); } -static Sint bm_find_first_match(BMData *bmd, byte *haystack, Uint len) +typedef struct { + Sint pos; + Uint len; +} BMFindFirstState; + +#define BM_OK 0 /* used only for find_all */ +#define BM_NOT_FOUND -1 +#define BM_RESTART -2 +#define BM_LOOP_FACTOR 1 + +static void bm_init_find_first_match(BMFindFirstState *state, Sint startpos, Uint len) +{ + state->pos = startpos; + state->len = len; +} + + +static Sint bm_find_first_match(BMFindFirstState *state, BMData *bmd, byte *haystack, Uint reductions) { Sint blen = bmd->len; + Uint len = state->len; Sint *gs = bmd->goodshift; Sint *bs = bmd->badshift; byte *needle = bmd->x; Sint i; - Sint j = 0; + Sint j = state->pos; + register Uint reds = reductions; while (j <= len - blen) { + if (--reds == 0) { + state->pos = j; + return BM_RESTART; + } for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i) ; if (i < 0) { /* found */ @@ -1209,7 +1342,103 @@ static Sint bm_find_first_match(BMData *bmd, byte *haystack, Uint len) } j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i); } - return -1; + return BM_NOT_FOUND; +} + +typedef struct { + Sint pos; + Uint len; + Uint m; + Uint allocated; + FindallData *out; +} BMFindAllState; + +static void bm_init_find_all(BMFindAllState *state, Sint startpos, Uint len) +{ + state->pos = startpos; + state->len = len; + state->m = 0; + state->allocated = 0; + state->out = NULL; +} + +static void bm_restore_find_all(BMFindAllState *state, char *buff) +{ + memcpy(state,buff,sizeof(BMFindAllState)); + state->out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * (state->allocated)); + memcpy(state->out,buff+sizeof(BMFindAllState),sizeof(FindallData)*state->m); +} + +static void bm_serialize_find_all(BMFindAllState *state, char *buff) +{ + memcpy(buff,state,sizeof(BMFindAllState)); + memcpy(buff+sizeof(BMFindAllState),state->out,sizeof(FindallData)*state->m); +} + +static void bm_clean_find_all(BMFindAllState *state) +{ + if (state->out != NULL) { + erts_free(ERTS_ALC_T_TMP, state->out); + } +#ifdef HARDDEBUG + state->out = NULL; + state->allocated = 0; +#endif +} + +#define SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(S) (sizeof(BMFindAllState)+(sizeof(FindallData)*(S).m)) + +/* + * Differs to the find_first function in that it stores all matches and the values + * arte returned only in the state. + */ +static Sint bm_find_all_non_overlapping(BMFindAllState *state, + BMData *bmd, byte *haystack, Uint reductions) +{ + Sint blen = bmd->len; + Uint len = state->len; + Sint *gs = bmd->goodshift; + Sint *bs = bmd->badshift; + byte *needle = bmd->x; + Sint i; + Sint j = state->pos; + Uint m = state->m; + Uint allocated = state->allocated; + FindallData *out = state->out; + register Uint reds = reductions; + + while (j <= len - blen) { + if (--reds == 0) { + state->pos = j; + state->m = m; + state->allocated = allocated; + state->out = out; + return BM_RESTART; + } + for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i) + ; + if (i < 0) { /* found */ + if (m >= allocated) { + if (!allocated) { + allocated = 10; + out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * allocated); + } else { + allocated *= 2; + out = erts_realloc(ERTS_ALC_T_TMP, out, + sizeof(FindallData) * allocated); + } + } + out[m].pos = j; + out[m].len = blen; + ++m; + j += blen; + } else { + j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i); + } + } + state->m = m; + state->out = out; + return (m == 0) ? BM_NOT_FOUND : BM_OK; } /* @@ -1335,150 +1564,310 @@ BIF_RETTYPE binary_compile_pattern_1(BIF_ALIST_1) BIF_RET(ret); } +#define DO_BIN_MATCH_OK 0 +#define DO_BIN_MATCH_BADARG -1 +#define DO_BIN_MATCH_RESTART -2 -BIF_RETTYPE binary_match_3(BIF_ALIST_3) +static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, + Eterm type, Binary *bin, Eterm state_term, Eterm *res_term) { - Uint hsstart, hslen; - Eterm *tp; - Eterm type; - Binary *bin; - Eterm bin_term = NIL; - if (is_not_binary(BIF_ARG_1)) { - goto badarg; - } - if (BIF_ARG_3 == ((Eterm) 0)) { - /* Invalid term, we're called from binary_match_2... */ - hsstart = 0; - hslen = binary_size(BIF_ARG_1); - } else if (is_tuple(BIF_ARG_3)) { - tp = tuple_val(BIF_ARG_3); - if (arityval(*tp) != 2) { - goto badarg; - } - if (!term_to_Uint(tp[1], &hsstart) || ((hsstart >> 16) >> 16) != 0) { - goto badarg; - } - if (!term_to_Uint(tp[2], &hslen) || ((hslen >> 16) >> 16) != 0) { - goto badarg; - } - if (hslen < hsstart) { - goto badarg; - } - if (hslen > binary_size(BIF_ARG_1)-1) { - goto badarg; /* XXX:PaN or should we take as much as we have ? */ - } - hslen = hslen + 1 - hsstart; - } else { + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + + ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize); + if (bitsize != 0) { goto badarg; } - if (hslen == 0) { - BIF_RET(am_nomatch); + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc); } - if (is_tuple(BIF_ARG_2)) { - tp = tuple_val(BIF_ARG_2); - if (arityval(*tp) != 2 || is_not_atom(tp[1])) { - goto badarg; - } - if (((tp[1] != am_bm) && (tp[1] != am_ac)) || - !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { - goto badarg; - } - type = tp[1]; - bin = ((ProcBin *) binary_val(tp[2]))->val; - if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) { - goto badarg; - } - bin_term = tp[2]; - } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) { - goto badarg; + if (state_term != NIL) { + Eterm *ptr = big_val(state_term); + type = ptr[1]; } if (type == am_bm) { BMData *bm; Sint pos; - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; Eterm ret; Eterm *hp; + BMFindFirstState state; + Uint reds = ERTS_BIF_REDS_LEFT(p) * BM_LOOP_FACTOR; + bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG dump_bm_data(bm); #endif - ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + if (state_term == NIL) { + bm_init_find_first_match(&state, hsstart, hslen); + } else { + Eterm *ptr = big_val(state_term); + memcpy(&state,ptr+2,sizeof(state)); } - pos = bm_find_first_match(bm, bytes + hsstart, hslen); - if (pos < 0) { +#ifdef HARDDEBUG + erts_printf("(bm) state->pos = %ld, state->len = %lu\n",state.pos, state.len); +#endif + pos = bm_find_first_match(&state, bm, bytes, reds); + if (pos == BM_NOT_FOUND) { ret = am_nomatch; + } else if (pos == BM_RESTART) { + int x = (sizeof(BMFindFirstState) / sizeof(Eterm)) + + !!(sizeof(BMFindFirstState) % sizeof(Eterm)); +#ifdef HARDDEBUG + erts_printf("Trap bm!\n"); +#endif + hp = HAlloc(p,x+2); + hp[0] = make_pos_bignum_header(x+1); + hp[1] = type; + memcpy(hp+2,&state,sizeof(state)); + *res_term = make_big(hp); + erts_free_aligned_binary_bytes(temp_alloc); + return DO_BIN_MATCH_RESTART; } else { - Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P); - ret = erts_make_integer(pos+hsstart,BIF_P); + ret = erts_make_integer(pos,p); if (bm->ret_tuple) { - hp = HAlloc(BIF_P,3); + Eterm erlen = erts_make_integer((Uint) bm->len, p); + hp = HAlloc(p,3); ret = TUPLE2(hp, ret, erlen); } } erts_free_aligned_binary_bytes(temp_alloc); - if (bin_term == NIL) { - erts_bin_free(bin); - } - BIF_RET(ret); + *res_term = ret; + return DO_BIN_MATCH_OK; } else if (type == am_ac) { ACTrie *act; - Uint pos, msn,rlen; - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; + Uint pos, rlen; + int acr; + ACFindFirstState state; Eterm ret; Eterm *hp; + Uint reds = ERTS_BIF_REDS_LEFT(p) * AC_LOOP_FACTOR; act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG dump_ac_trie(act); #endif - ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + if (state_term == NIL) { + ac_init_find_first_match(&state, act, hsstart, hslen); + } else { + Eterm *ptr = big_val(state_term); + memcpy(&state,ptr+2,sizeof(state)); } - msn = ac_find_first_match(act, bytes + hsstart, - hslen, &pos, &rlen); - if (msn == 0) { + acr = ac_find_first_match(&state, bytes, &pos, &rlen, reds); + if (acr == AC_NOT_FOUND) { ret = am_nomatch; + } else if (acr == AC_RESTART) { + int x = (sizeof(state) / sizeof(Eterm)) + + !!(sizeof(BMFindFirstState) % sizeof(Eterm)); +#ifdef HARDDEBUG + erts_printf("Trap ac!\n"); +#endif + hp = HAlloc(p,x+2); + hp[0] = make_pos_bignum_header(x+1); + hp[1] = type; + memcpy(hp+2,&state,sizeof(state)); + *res_term = make_big(hp); + erts_free_aligned_binary_bytes(temp_alloc); + return DO_BIN_MATCH_RESTART; } else { - Eterm epos = erts_make_integer(pos+hsstart,BIF_P); - Eterm erlen = erts_make_integer(rlen,BIF_P); - hp = HAlloc(BIF_P,3); + Eterm epos = erts_make_integer(pos+hsstart,p); + Eterm erlen = erts_make_integer(rlen,p); + hp = HAlloc(p,3); ret = TUPLE2(hp, epos, erlen); } erts_free_aligned_binary_bytes(temp_alloc); - if (bin_term == NIL) { - erts_bin_free(bin); + *res_term = ret; + return DO_BIN_MATCH_OK; + } + badarg: + return DO_BIN_MATCH_BADARG; +} + +static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, Uint hslen, + Eterm type, Binary *bin, Eterm state_term, Eterm *res_term) +{ + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + + ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc); + } + if (state_term != NIL) { + Eterm *ptr = big_val(state_term); + type = ptr[1]; + } + + if (type == am_bm) { + BMData *bm; + Sint pos; + Eterm ret,tpl; + Eterm *hp; + BMFindAllState state; + Uint reds = ERTS_BIF_REDS_LEFT(p) * BM_LOOP_FACTOR; + + bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); +#ifdef HARDDEBUG + dump_bm_data(bm); +#endif + if (state_term == NIL) { + bm_init_find_all(&state, hsstart, hslen); + } else { + Eterm *ptr = big_val(state_term); + bm_restore_find_all(&state,(char *) (ptr+2)); + } + + pos = bm_find_all_non_overlapping(&state, bm, bytes, reds); + if (pos == BM_NOT_FOUND) { + ret = am_nomatch; + } else if (pos == BM_RESTART) { + int x = (SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + + !!(SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm)); +#ifdef HARDDEBUG + erts_printf("Trap bm!\n"); +#endif + hp = HAlloc(p,x+2); + hp[0] = make_pos_bignum_header(x+1); + hp[1] = type; + bm_serialize_find_all(&state, (char *) (hp+2)); + *res_term = make_big(hp); + erts_free_aligned_binary_bytes(temp_alloc); + bm_clean_find_all(&state); + return DO_BIN_MATCH_RESTART; + } else { + FindallData *fad = state.out; + int i; + for (i = 0; i < state.m; ++i) { + fad[i].epos = erts_make_integer(fad[i].pos,p); + fad[i].elen = erts_make_integer(fad[i].len,p); + } + hp = HAlloc(p,state.m * (3 + 2)); + ret = NIL; + for (i = state.m - 1; i >= 0; --i) { + tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); + hp +=3; + ret = CONS(hp,tpl,ret); + hp += 2; + } + } + erts_free_aligned_binary_bytes(temp_alloc); + bm_clean_find_all(&state); + *res_term = ret; + return DO_BIN_MATCH_OK; + } else if (type == am_ac) { + ACTrie *act; + int acr; + ACFindAllState state; + Eterm ret,tpl; + Eterm *hp; + Uint reds = ERTS_BIF_REDS_LEFT(p) * AC_LOOP_FACTOR; + + act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); +#ifdef HARDDEBUG + dump_ac_trie(act); +#endif + if (state_term == NIL) { + ac_init_find_all(&state, act, hsstart, hslen); + } else { + Eterm *ptr = big_val(state_term); + ac_restore_find_all(&state,(char *) (ptr+2)); } - BIF_RET(ret); + acr = ac_find_all_non_overlapping(&state, bytes, reds); + if (acr == AC_NOT_FOUND) { + ret = am_nomatch; + } else if (acr == AC_RESTART) { + int x = (SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + + !!(SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm)); +#ifdef HARDDEBUG + erts_printf("Trap ac!\n"); +#endif + hp = HAlloc(p,x+2); + hp[0] = make_pos_bignum_header(x+1); + hp[1] = type; + ac_serialize_find_all(&state, (char *) (hp+2)); + *res_term = make_big(hp); + erts_free_aligned_binary_bytes(temp_alloc); + ac_clean_find_all(&state); + return DO_BIN_MATCH_RESTART; + } else { + FindallData *fad = state.out; + int i; + for (i = 0; i < state.m; ++i) { + fad[i].epos = erts_make_integer(fad[i].pos,p); + fad[i].elen = erts_make_integer(fad[i].len,p); + } + hp = HAlloc(p,state.m * (3 + 2)); + ret = NIL; + for (i = state.m - 1; i >= 0; --i) { + tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); + hp +=3; + ret = CONS(hp,tpl,ret); + hp += 2; + } + } + erts_free_aligned_binary_bytes(temp_alloc); + ac_clean_find_all(&state); + *res_term = ret; + return DO_BIN_MATCH_OK; + } + badarg: + return DO_BIN_MATCH_BADARG; +} + +static BIF_RETTYPE binary_match_trap(BIF_ALIST_3) +{ + int runres; + Eterm result; + Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val; + runres = do_binary_match(BIF_P,BIF_ARG_1,0,0,NIL,bin,BIF_ARG_2,&result); + switch (runres) { + case DO_BIN_MATCH_OK: + BIF_RET(result); + case DO_BIN_MATCH_RESTART: + BUMP_ALL_REDS(BIF_P); + BIF_TRAP3(&binary_match_trap_export, BIF_P, BIF_ARG_1, result, BIF_ARG_3); + default: + goto badarg; } badarg: BIF_ERROR(BIF_P,BADARG); } -BIF_RETTYPE binary_match_2(BIF_ALIST_2) + +static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3) { - return binary_match_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); + int runres; + Eterm result; + Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val; + runres = do_binary_matches(BIF_P,BIF_ARG_1,0,0,NIL,bin,BIF_ARG_2,&result); + switch (runres) { + case DO_BIN_MATCH_OK: + BIF_RET(result); + case DO_BIN_MATCH_RESTART: + BUMP_ALL_REDS(BIF_P); + BIF_TRAP3(&binary_matches_trap_export, BIF_P, BIF_ARG_1, result, BIF_ARG_3); + default: + goto badarg; + } + badarg: + BIF_ERROR(BIF_P,BADARG); } -BIF_RETTYPE binary_matches_3(BIF_ALIST_3) + +BIF_RETTYPE binary_match_3(BIF_ALIST_3) { Uint hsstart, hslen; Eterm *tp; Eterm type; Binary *bin; Eterm bin_term = NIL; + int runres; + Eterm result; + if (is_not_binary(BIF_ARG_1)) { goto badarg; } @@ -1486,25 +1875,33 @@ BIF_RETTYPE binary_matches_3(BIF_ALIST_3) /* Invalid term, we're called from binary_match_2... */ hsstart = 0; hslen = binary_size(BIF_ARG_1); - } else if (is_tuple(BIF_ARG_3)) { - tp = tuple_val(BIF_ARG_3); - if (arityval(*tp) != 2) { - goto badarg; - } - if (!term_to_Uint(tp[1], &hsstart) || ((hsstart >> 16) >> 16) != 0) { - goto badarg; - } - if (!term_to_Uint(tp[2], &hslen) || ((hslen >> 16) >> 16) != 0) { - goto badarg; - } - if (hslen < hsstart) { - goto badarg; - } - if (hslen > binary_size(BIF_ARG_1)-1) { - goto badarg; /* XXX:PaN or should we take as much as we have ? */ + } else if (is_list(BIF_ARG_3)) { + Eterm l = BIF_ARG_3; + while(is_list(l)) { + Eterm t = CAR(list_val(l)); + if (!is_tuple(t)) { + goto badarg; + } + tp = tuple_val(t); + if (arityval(*tp) != 2) { + goto badarg; + } + if (!term_to_Uint(tp[1], &hsstart) || ((hsstart >> 16) >> 16) != 0) { + goto badarg; + } + if (!term_to_Uint(tp[2], &hslen) || ((hslen >> 16) >> 16) != 0) { + goto badarg; + } + if (hslen < hsstart) { + goto badarg; + } + if (hslen > binary_size(BIF_ARG_1)-1) { + goto badarg; /* XXX:PaN or should we take as much as we have ? */ + } + hslen = hslen + 1 - hsstart; + l = CDR(list_val(l)); } - hslen = hslen + 1 - hsstart; - } else { + } else if (BIF_ARG_3 != NIL) { goto badarg; } if (hslen == 0) { @@ -1528,94 +1925,120 @@ BIF_RETTYPE binary_matches_3(BIF_ALIST_3) } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) { goto badarg; } + runres = do_binary_match(BIF_P,BIF_ARG_1,hsstart,hslen,type,bin,NIL,&result); + if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { + Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE+3); + bin_term = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin); + } else if (bin_term == NIL) { + erts_bin_free(bin); + } + switch (runres) { + case DO_BIN_MATCH_OK: + BIF_RET(result); + case DO_BIN_MATCH_RESTART: + BUMP_ALL_REDS(BIF_P); + BIF_TRAP3(&binary_match_trap_export, BIF_P, BIF_ARG_1, result, bin_term); + default: + goto badarg; + } + badarg: + BIF_ERROR(BIF_P,BADARG); +} - if (type == am_bm) { - BMData *bm; - Sint pos; - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - Eterm ret; - Eterm *hp; - bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_bm_data(bm); -#endif - ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); - } - pos = bm_find_first_match(bm, bytes + hsstart, hslen); - if (pos < 0) { - ret = am_nomatch; - } else { - Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P); - ret = erts_make_integer(pos,BIF_P); - if (bm->ret_tuple) { - hp = HAlloc(BIF_P,3); - ret = TUPLE2(hp, ret, erlen); - } - } - erts_free_aligned_binary_bytes(temp_alloc); - if (bin_term == NIL) { - erts_bin_free(bin); - } - BIF_RET(ret); - } else if (type == am_ac) { - ACTrie *act; - Uint rlen; - Sint i; - FindallData *fad; - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - Eterm ret,tpl; - Eterm *hp; +BIF_RETTYPE binary_matches_3(BIF_ALIST_3) +{ + Uint hsstart, hslen; + Eterm *tp; + Eterm type; + Binary *bin; + Eterm bin_term = NIL; + int runres; + Eterm result; - act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_ac_trie(act); -#endif - ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); - } - rlen = ac_find_all_non_overlapping(act, bytes + hsstart, - hslen, &fad); - if (rlen == 0) { - ret = am_nomatch; - } else { - for (i = 0; i < rlen; ++i) { - fad[i].epos = erts_make_integer(fad[i].pos,BIF_P); - fad[i].elen = erts_make_integer(fad[i].len,BIF_P); + if (is_not_binary(BIF_ARG_1)) { + goto badarg; + } + if (BIF_ARG_3 == ((Eterm) 0)) { + /* Invalid term, we're called from binary_matches_2... */ + hsstart = 0; + hslen = binary_size(BIF_ARG_1); + } else if (is_list(BIF_ARG_3)) { + Eterm l = BIF_ARG_3; + while(is_list(l)) { + Eterm t = CAR(list_val(l)); + if (!is_tuple(t)) { + goto badarg; } - hp = HAlloc(BIF_P,rlen * (3 + 2)); - ret = NIL; - for (i = rlen - 1; i >= 0; --i) { - tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); - hp +=3; - ret = CONS(hp,tpl,ret); - hp += 2; + tp = tuple_val(t); + if (arityval(*tp) != 2) { + goto badarg; + } + if (!term_to_Uint(tp[1], &hsstart) || ((hsstart >> 16) >> 16) != 0) { + goto badarg; } + if (!term_to_Uint(tp[2], &hslen) || ((hslen >> 16) >> 16) != 0) { + goto badarg; + } + if (hslen < hsstart) { + goto badarg; + } + if (hslen > binary_size(BIF_ARG_1)-1) { + goto badarg; /* XXX:PaN or should we take as much as we have ? */ + } + hslen = hslen + 1 - hsstart; + l = CDR(list_val(l)); } - erts_free_aligned_binary_bytes(temp_alloc); - if (fad != NULL) { - erts_free(ERTS_ALC_T_TMP,fad); + } else if (BIF_ARG_3 != NIL) { + goto badarg; + } + if (hslen == 0) { + BIF_RET(am_nomatch); + } + if (is_tuple(BIF_ARG_2)) { + tp = tuple_val(BIF_ARG_2); + if (arityval(*tp) != 2 || is_not_atom(tp[1])) { + goto badarg; } - if (bin_term == NIL) { - erts_bin_free(bin); + if (((tp[1] != am_bm) && (tp[1] != am_ac)) || + !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { + goto badarg; } - BIF_RET(ret); + type = tp[1]; + bin = ((ProcBin *) binary_val(tp[2]))->val; + if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) { + goto badarg; + } + bin_term = tp[2]; + } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) { + goto badarg; + } + runres = do_binary_matches(BIF_P,BIF_ARG_1,hsstart,hslen,type,bin,NIL,&result); + if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { + Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE+3); + bin_term = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin); + } else if (bin_term == NIL) { + erts_bin_free(bin); + } + switch (runres) { + case DO_BIN_MATCH_OK: + BIF_RET(result); + case DO_BIN_MATCH_RESTART: + BUMP_ALL_REDS(BIF_P); + BIF_TRAP3(&binary_matches_trap_export, BIF_P, BIF_ARG_1, result, bin_term); + default: + goto badarg; } badarg: BIF_ERROR(BIF_P,BADARG); } + + +BIF_RETTYPE binary_match_2(BIF_ALIST_2) +{ + return binary_match_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); +} + + BIF_RETTYPE binary_matches_2(BIF_ALIST_2) { return binary_matches_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index e63ec8a3cc..f2e71ae98d 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -281,6 +281,7 @@ erl_init(void) init_load(); erts_init_bif(); erts_init_bif_chksum(); + erts_init_bif_binary(); erts_init_bif_re(); erts_init_unicode(); /* after RE to get access to PCRE unicode */ erts_delay_trap = erts_export_put(am_erlang, am_delay_trap, 2); diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index fbb40e4202..f25b082049 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -1575,6 +1575,7 @@ extern int erts_cpu_timestamp; void erts_init_bif_chksum(void); /* erl_bif_re.c */ void erts_init_bif_re(void); +void erts_init_bif_binary(void); Sint erts_re_set_loop_limit(Sint limit); /* erl_unicode.c */ void erts_init_unicode(void); -- cgit v1.2.3 From f06f499690ef1f5c8659128095a82d6c9b834d68 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Tue, 20 Apr 2010 17:32:48 +0200 Subject: Add random compare testcase Fix heap-hole when trapping in binary.c Fix boyer more segfaulting when searchstring is longer than haystack --- erts/emulator/beam/binary.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c index 2b110e8b82..f51815d615 100644 --- a/erts/emulator/beam/binary.c +++ b/erts/emulator/beam/binary.c @@ -782,7 +782,6 @@ typedef struct _ac_trie { } ACTrie; typedef struct _bm_data { - int ret_tuple; byte *x; Sint len; Sint *goodshift; @@ -868,7 +867,6 @@ static BMData *create_bmdata(MyAllocator *my, byte *x, Uint len, Binary **the_bi memcpy(bmd->x,x,len); bmd->len = len; bmd->goodshift = my_alloc(my,sizeof(Uint) * len); - bmd->ret_tuple = 0; *the_bin = mb; return bmd; } @@ -1304,7 +1302,7 @@ static void compute_goodshifts(BMData *bmd) typedef struct { Sint pos; - Uint len; + Sint len; } BMFindFirstState; #define BM_OK 0 /* used only for find_all */ @@ -1315,14 +1313,14 @@ typedef struct { static void bm_init_find_first_match(BMFindFirstState *state, Sint startpos, Uint len) { state->pos = startpos; - state->len = len; + state->len = (Sint) len; } static Sint bm_find_first_match(BMFindFirstState *state, BMData *bmd, byte *haystack, Uint reductions) { Sint blen = bmd->len; - Uint len = state->len; + Sint len = state->len; Sint *gs = bmd->goodshift; Sint *bs = bmd->badshift; byte *needle = bmd->x; @@ -1347,7 +1345,7 @@ static Sint bm_find_first_match(BMFindFirstState *state, BMData *bmd, byte *hays typedef struct { Sint pos; - Uint len; + Sint len; Uint m; Uint allocated; FindallData *out; @@ -1356,7 +1354,7 @@ typedef struct { static void bm_init_find_all(BMFindAllState *state, Sint startpos, Uint len) { state->pos = startpos; - state->len = len; + state->len = (Sint) len; state->m = 0; state->allocated = 0; state->out = NULL; @@ -1396,7 +1394,7 @@ static Sint bm_find_all_non_overlapping(BMFindAllState *state, BMData *bmd, byte *haystack, Uint reductions) { Sint blen = bmd->len; - Uint len = state->len; + Sint len = state->len; Sint *gs = bmd->goodshift; Sint *bs = bmd->badshift; byte *needle = bmd->x; @@ -1454,13 +1452,11 @@ static int do_binary_match_compile(Eterm argument, Eterm *tag, Binary **binp) Eterm t, b, comp_term = NIL; Uint characters; Uint words; - int return_tuple = 0; characters = 0; words = 0; if (is_list(argument)) { - return_tuple = 1; t = argument; while (is_list(t)) { b = CAR(list_val(t)); @@ -1509,7 +1505,6 @@ static int do_binary_match_compile(Eterm argument, Eterm *tag, Binary **binp) bytes = erts_get_aligned_binary_bytes(comp_term, &temp_alloc); } bmd = create_bmdata(&my, bytes, characters, &bin); - bmd->ret_tuple = return_tuple; compute_badshifts(bmd); compute_goodshifts(bmd); erts_free_aligned_binary_bytes(temp_alloc); @@ -1625,12 +1620,10 @@ static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, erts_free_aligned_binary_bytes(temp_alloc); return DO_BIN_MATCH_RESTART; } else { + Eterm erlen = erts_make_integer((Uint) bm->len, p); ret = erts_make_integer(pos,p); - if (bm->ret_tuple) { - Eterm erlen = erts_make_integer((Uint) bm->len, p); - hp = HAlloc(p,3); - ret = TUPLE2(hp, ret, erlen); - } + hp = HAlloc(p,3); + ret = TUPLE2(hp, ret, erlen); } erts_free_aligned_binary_bytes(temp_alloc); *res_term = ret; @@ -1927,7 +1920,7 @@ BIF_RETTYPE binary_match_3(BIF_ALIST_3) } runres = do_binary_match(BIF_P,BIF_ARG_1,hsstart,hslen,type,bin,NIL,&result); if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { - Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE+3); + Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE); bin_term = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin); } else if (bin_term == NIL) { erts_bin_free(bin); @@ -2014,7 +2007,7 @@ BIF_RETTYPE binary_matches_3(BIF_ALIST_3) } runres = do_binary_matches(BIF_P,BIF_ARG_1,hsstart,hslen,type,bin,NIL,&result); if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { - Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE+3); + Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE); bin_term = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin); } else if (bin_term == NIL) { erts_bin_free(bin); @@ -2054,7 +2047,6 @@ static void dump_bm_data(BMData *bm) int i,j; erts_printf("Dumping Boyer-More structure.\n"); erts_printf("=============================\n"); - erts_printf("Return tuple: %d\n",bm->ret_tuple); erts_printf("Searchstring [%ld]:\n", bm->len); erts_printf("<<"); for (i = 0; i < bm->len; ++i) { -- cgit v1.2.3 From e27516edd537045e1151dc8a95c821ba18aadf4f Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Wed, 21 Apr 2010 16:37:16 +0200 Subject: Count reductions for process even when not trapping Set loop factors to 10. Teach erts_debug:set_internal_state to limit loop factor for binary. Add random tests for matches and match with multiple searchstrings. --- erts/emulator/beam/binary.c | 80 +++++++++++++++++++++++++++++---------- erts/emulator/beam/erl_bif_info.c | 11 ++++++ erts/emulator/beam/global.h | 4 +- 3 files changed, 74 insertions(+), 21 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c index f51815d615..6b6571a1b2 100644 --- a/erts/emulator/beam/binary.c +++ b/erts/emulator/beam/binary.c @@ -695,6 +695,9 @@ static Export binary_match_trap_export; static BIF_RETTYPE binary_match_trap(BIF_ALIST_3); static Export binary_matches_trap_export; static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3); +static Uint max_loop_limit; + + void erts_init_bif_binary(void) { sys_memset((void *) &binary_match_trap_export, 0, sizeof(Export)); @@ -713,9 +716,31 @@ void erts_init_bif_binary(void) binary_matches_trap_export.code[3] = (BeamInstr) em_apply_bif; binary_matches_trap_export.code[4] = (BeamInstr) &binary_matches_trap; + max_loop_limit = 0; return; } +Sint erts_binary_set_loop_limit(Sint limit) +{ + Sint save = (Sint) max_loop_limit; + if (limit <= 0) { + max_loop_limit = 0; + } else { + max_loop_limit = (Uint) limit; + } + return save; +} + +static Uint get_reds(Process *p, int loop_factor) +{ + Uint reds = ERTS_BIF_REDS_LEFT(p) * loop_factor; + Uint tmp = max_loop_limit; + if (tmp != 0 && tmp < reds) { + return tmp; + } + return reds; +} + #define MYALIGN(Size) (SIZEOF_VOID_P * (((Size) / SIZEOF_VOID_P) + \ !!(((Size) % SIZEOF_VOID_P)))) @@ -979,6 +1004,7 @@ static void ac_compute_failure_functions(ACTrie *act, ACNode **qbuff) root->h = root; } + /* * The actual searching for needles in the haystack... * Find first match using Aho-Coracick Trie @@ -1009,10 +1035,10 @@ static void ac_init_find_first_match(ACFindFirstState *state, ACTrie *act, Sint #define AC_NOT_FOUND -1 #define AC_RESTART -2 -#define AC_LOOP_FACTOR 1 +#define AC_LOOP_FACTOR 10 static int ac_find_first_match(ACFindFirstState *state, byte *haystack, - Uint *mpos, Uint *mlen, Uint reductions) + Uint *mpos, Uint *mlen, Uint *reductions) { ACNode *q = state->q; Uint i = state->pos; @@ -1020,7 +1046,7 @@ static int ac_find_first_match(ACFindFirstState *state, byte *haystack, Uint len = state->len; Uint candidate_start = state->candidate_start; Uint rstart; - register Uint reds = (Uint) reductions; + register Uint reds = *reductions; while (i < len) { if (--reds == 0) { @@ -1057,6 +1083,7 @@ static int ac_find_first_match(ACFindFirstState *state, byte *haystack, } } } + *reductions = reds; if (!candidate) { return AC_NOT_FOUND; } @@ -1127,7 +1154,8 @@ static void ac_clean_find_all(ACFindAllState *state) * Differs to the find_first function in that it stores all matches and the values * arte returned only in the state. */ -static int ac_find_all_non_overlapping(ACFindAllState *state, byte *haystack, Uint reductions) +static int ac_find_all_non_overlapping(ACFindAllState *state, byte *haystack, + Uint *reductions) { ACNode *q = state->q; Uint i = state->pos; @@ -1137,7 +1165,7 @@ static int ac_find_all_non_overlapping(ACFindAllState *state, byte *haystack, Ui Uint m = state->m, save_m; Uint allocated = state->allocated; FindallData *out = state->out; - register Uint reds = (Uint) reductions; + register Uint reds = *reductions; while (i < len) { @@ -1212,6 +1240,7 @@ static int ac_find_all_non_overlapping(ACFindAllState *state, byte *haystack, Ui } } } + *reductions = reds; state->m = m; state->out = out; return (m == 0) ? AC_NOT_FOUND : AC_OK; @@ -1308,7 +1337,7 @@ typedef struct { #define BM_OK 0 /* used only for find_all */ #define BM_NOT_FOUND -1 #define BM_RESTART -2 -#define BM_LOOP_FACTOR 1 +#define BM_LOOP_FACTOR 10 /* Should we have a higher value? */ static void bm_init_find_first_match(BMFindFirstState *state, Sint startpos, Uint len) { @@ -1317,7 +1346,7 @@ static void bm_init_find_first_match(BMFindFirstState *state, Sint startpos, Uin } -static Sint bm_find_first_match(BMFindFirstState *state, BMData *bmd, byte *haystack, Uint reductions) +static Sint bm_find_first_match(BMFindFirstState *state, BMData *bmd, byte *haystack, Uint *reductions) { Sint blen = bmd->len; Sint len = state->len; @@ -1326,7 +1355,7 @@ static Sint bm_find_first_match(BMFindFirstState *state, BMData *bmd, byte *hays byte *needle = bmd->x; Sint i; Sint j = state->pos; - register Uint reds = reductions; + register Uint reds = *reductions; while (j <= len - blen) { if (--reds == 0) { @@ -1336,10 +1365,12 @@ static Sint bm_find_first_match(BMFindFirstState *state, BMData *bmd, byte *hays for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i) ; if (i < 0) { /* found */ + *reductions = reds; return j; } j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i); } + *reductions = reds; return BM_NOT_FOUND; } @@ -1391,7 +1422,7 @@ static void bm_clean_find_all(BMFindAllState *state) * arte returned only in the state. */ static Sint bm_find_all_non_overlapping(BMFindAllState *state, - BMData *bmd, byte *haystack, Uint reductions) + BMData *bmd, byte *haystack, Uint *reductions) { Sint blen = bmd->len; Sint len = state->len; @@ -1403,7 +1434,7 @@ static Sint bm_find_all_non_overlapping(BMFindAllState *state, Uint m = state->m; Uint allocated = state->allocated; FindallData *out = state->out; - register Uint reds = reductions; + register Uint reds = *reductions; while (j <= len - blen) { if (--reds == 0) { @@ -1436,6 +1467,7 @@ static Sint bm_find_all_non_overlapping(BMFindAllState *state, } state->m = m; state->out = out; + *reductions = reds; return (m == 0) ? BM_NOT_FOUND : BM_OK; } @@ -1588,7 +1620,8 @@ static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, Eterm ret; Eterm *hp; BMFindFirstState state; - Uint reds = ERTS_BIF_REDS_LEFT(p) * BM_LOOP_FACTOR; + Uint reds = get_reds(p, BM_LOOP_FACTOR); + Uint save_reds = reds; bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG @@ -1603,7 +1636,7 @@ static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, #ifdef HARDDEBUG erts_printf("(bm) state->pos = %ld, state->len = %lu\n",state.pos, state.len); #endif - pos = bm_find_first_match(&state, bm, bytes, reds); + pos = bm_find_first_match(&state, bm, bytes, &reds); if (pos == BM_NOT_FOUND) { ret = am_nomatch; } else if (pos == BM_RESTART) { @@ -1626,6 +1659,7 @@ static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, ret = TUPLE2(hp, ret, erlen); } erts_free_aligned_binary_bytes(temp_alloc); + BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); *res_term = ret; return DO_BIN_MATCH_OK; } else if (type == am_ac) { @@ -1635,7 +1669,8 @@ static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, ACFindFirstState state; Eterm ret; Eterm *hp; - Uint reds = ERTS_BIF_REDS_LEFT(p) * AC_LOOP_FACTOR; + Uint reds = get_reds(p, AC_LOOP_FACTOR); + Uint save_reds = reds; act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG @@ -1647,7 +1682,7 @@ static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, Eterm *ptr = big_val(state_term); memcpy(&state,ptr+2,sizeof(state)); } - acr = ac_find_first_match(&state, bytes, &pos, &rlen, reds); + acr = ac_find_first_match(&state, bytes, &pos, &rlen, &reds); if (acr == AC_NOT_FOUND) { ret = am_nomatch; } else if (acr == AC_RESTART) { @@ -1670,6 +1705,7 @@ static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, ret = TUPLE2(hp, epos, erlen); } erts_free_aligned_binary_bytes(temp_alloc); + BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); *res_term = ret; return DO_BIN_MATCH_OK; } @@ -1702,7 +1738,8 @@ static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, Uint hslen Eterm ret,tpl; Eterm *hp; BMFindAllState state; - Uint reds = ERTS_BIF_REDS_LEFT(p) * BM_LOOP_FACTOR; + Uint reds = get_reds(p, BM_LOOP_FACTOR); + Uint save_reds = reds; bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG @@ -1715,9 +1752,9 @@ static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, Uint hslen bm_restore_find_all(&state,(char *) (ptr+2)); } - pos = bm_find_all_non_overlapping(&state, bm, bytes, reds); + pos = bm_find_all_non_overlapping(&state, bm, bytes, &reds); if (pos == BM_NOT_FOUND) { - ret = am_nomatch; + ret = NIL; } else if (pos == BM_RESTART) { int x = (SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + !!(SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm)); @@ -1750,6 +1787,7 @@ static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, Uint hslen } erts_free_aligned_binary_bytes(temp_alloc); bm_clean_find_all(&state); + BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); *res_term = ret; return DO_BIN_MATCH_OK; } else if (type == am_ac) { @@ -1758,7 +1796,8 @@ static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, Uint hslen ACFindAllState state; Eterm ret,tpl; Eterm *hp; - Uint reds = ERTS_BIF_REDS_LEFT(p) * AC_LOOP_FACTOR; + Uint reds = get_reds(p, AC_LOOP_FACTOR); + Uint save_reds = reds; act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG @@ -1770,9 +1809,9 @@ static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, Uint hslen Eterm *ptr = big_val(state_term); ac_restore_find_all(&state,(char *) (ptr+2)); } - acr = ac_find_all_non_overlapping(&state, bytes, reds); + acr = ac_find_all_non_overlapping(&state, bytes, &reds); if (acr == AC_NOT_FOUND) { - ret = am_nomatch; + ret = NIL; } else if (acr == AC_RESTART) { int x = (SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + !!(SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm)); @@ -1805,6 +1844,7 @@ static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, Uint hslen } erts_free_aligned_binary_bytes(temp_alloc); ac_clean_find_all(&state); + BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); *res_term = ret; return DO_BIN_MATCH_OK; } diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c index 18cb09d8cd..de60ca49fa 100644 --- a/erts/emulator/beam/erl_bif_info.c +++ b/erts/emulator/beam/erl_bif_info.c @@ -3567,6 +3567,17 @@ BIF_RETTYPE erts_debug_set_internal_state_2(BIF_ALIST_2) } } } + else if (ERTS_IS_ATOM_STR("binary_loop_limit", BIF_ARG_1)) { + /* Used by binary_module_SUITE (stdlib) */ + Uint max_loops; + if (is_atom(BIF_ARG_2) && ERTS_IS_ATOM_STR("default", BIF_ARG_2)) { + max_loops = erts_binary_set_loop_limit(-1); + BIF_RET(make_small(max_loops)); + } else if (term_to_Uint(BIF_ARG_2, &max_loops) != 0) { + max_loops = erts_binary_set_loop_limit(max_loops); + BIF_RET(make_small(max_loops)); + } + } else if (ERTS_IS_ATOM_STR("re_loop_limit", BIF_ARG_1)) { /* Used by re_SUITE (stdlib) */ Uint max_loops; diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index f25b082049..4745aaf9f5 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -1575,8 +1575,10 @@ extern int erts_cpu_timestamp; void erts_init_bif_chksum(void); /* erl_bif_re.c */ void erts_init_bif_re(void); -void erts_init_bif_binary(void); Sint erts_re_set_loop_limit(Sint limit); +/* erl_bif_binary.c */ +void erts_init_bif_binary(void); +Sint erts_binary_set_loop_limit(Sint limit); /* erl_unicode.c */ void erts_init_unicode(void); Sint erts_unicode_set_loop_limit(Sint limit); -- cgit v1.2.3 From 8e8e10d9d080655edba6dedbc13d9e729f209e2e Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Wed, 21 Apr 2010 17:18:41 +0200 Subject: Move binary module bif's to erl_bif_binary.c --- erts/emulator/Makefile.in | 3 +- erts/emulator/beam/binary.c | 1467 --------------------------------- erts/emulator/beam/erl_bif_binary.c | 1548 +++++++++++++++++++++++++++++++++++ 3 files changed, 1550 insertions(+), 1468 deletions(-) create mode 100644 erts/emulator/beam/erl_bif_binary.c (limited to 'erts') diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in index 9f10a0ffaa..d767194d4d 100644 --- a/erts/emulator/Makefile.in +++ b/erts/emulator/Makefile.in @@ -735,7 +735,8 @@ RUN_OBJS = \ $(OBJDIR)/erl_drv_thread.o $(OBJDIR)/erl_bif_chksum.o \ $(OBJDIR)/erl_bif_re.o $(OBJDIR)/erl_unicode.o \ $(OBJDIR)/packet_parser.o $(OBJDIR)/safe_hash.o \ - $(OBJDIR)/erl_zlib.o $(OBJDIR)/erl_nif.o + $(OBJDIR)/erl_zlib.o $(OBJDIR)/erl_nif.o \ + $(OBJDIR)/erl_bif_binary.o ifeq ($(TARGET),win32) DRV_OBJS = \ diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c index 6b6571a1b2..7ca3eb686d 100644 --- a/erts/emulator/beam/binary.c +++ b/erts/emulator/beam/binary.c @@ -676,1470 +676,3 @@ bitstr_list_len(Eterm obj) return (Sint) -1; } -/* - * The native implementation functions for the module binary. - * Searching is implemented using aither Boyer-More or Aho-Corasick - * depending on number of searchstrings (BM if one, AC if more than one). - * Native implementation is mostly for efficiency, nothing (except binary:referenced_byte_size) - * really *needs* to be implemented in native code. - */ - -/* #define HARDDEBUG */ - -/* - * A micro allocator used when building search structures, just a convenience - * for building structures inside a pre alocated magic binary using conventional - * malloc-like interface. - */ -static Export binary_match_trap_export; -static BIF_RETTYPE binary_match_trap(BIF_ALIST_3); -static Export binary_matches_trap_export; -static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3); -static Uint max_loop_limit; - - -void erts_init_bif_binary(void) -{ - sys_memset((void *) &binary_match_trap_export, 0, sizeof(Export)); - binary_match_trap_export.address = &binary_match_trap_export.code[3]; - binary_match_trap_export.code[0] = am_erlang; - binary_match_trap_export.code[1] = am_binary_match_trap; - binary_match_trap_export.code[2] = 3; - binary_match_trap_export.code[3] = (BeamInstr) em_apply_bif; - binary_match_trap_export.code[4] = (BeamInstr) &binary_match_trap; - - sys_memset((void *) &binary_matches_trap_export, 0, sizeof(Export)); - binary_matches_trap_export.address = &binary_matches_trap_export.code[3]; - binary_matches_trap_export.code[0] = am_erlang; - binary_matches_trap_export.code[1] = am_binary_matches_trap; - binary_matches_trap_export.code[2] = 3; - binary_matches_trap_export.code[3] = (BeamInstr) em_apply_bif; - binary_matches_trap_export.code[4] = (BeamInstr) &binary_matches_trap; - - max_loop_limit = 0; - return; -} - -Sint erts_binary_set_loop_limit(Sint limit) -{ - Sint save = (Sint) max_loop_limit; - if (limit <= 0) { - max_loop_limit = 0; - } else { - max_loop_limit = (Uint) limit; - } - return save; -} - -static Uint get_reds(Process *p, int loop_factor) -{ - Uint reds = ERTS_BIF_REDS_LEFT(p) * loop_factor; - Uint tmp = max_loop_limit; - if (tmp != 0 && tmp < reds) { - return tmp; - } - return reds; -} - -#define MYALIGN(Size) (SIZEOF_VOID_P * (((Size) / SIZEOF_VOID_P) + \ - !!(((Size) % SIZEOF_VOID_P)))) - -#ifdef DEBUG -#define CHECK_ALLOCATOR(My) ASSERT((My).current <= ((My).mem + (My).size)) -#else -#define CHECK_ALLOCATOR(My) /* nothing */ -#endif - -typedef struct _my_allocator { - Uint size; - byte *current; - byte *mem; -} MyAllocator; - -static void init_my_allocator(MyAllocator *my, Uint siz, byte *array) -{ - ASSERT((siz % SIZEOF_VOID_P) == 0); - my->size = siz; - my->mem = array; - my->current = my->mem; -} - -static void *my_alloc(MyAllocator *my, Uint size) -{ - void *ptr = my->current; - my->current += MYALIGN(size); - return ptr; -} - -/* - * The search functionality. - * - * The search is byte oriented, which works nicely for UTF-8 as well as latin1 data - */ - -#define ALPHABET_SIZE 256 - -typedef struct _ac_node { -#ifdef HARDDEBUG - Uint32 id; /* To identify h pointer targets when dumping */ -#endif - Uint32 d; /* Depth in trie, also represents the length - (-1) of the matched string if in - final set */ - Sint32 final; /* Members in final set represent matches. - * The set representation is scattered - * among the nodes in this way: - * >0 -> this represents a member of - * the final set, <0 -> member of - * final set somewhere in the failure chain, - * 0 -> not member of the final set */ - struct _ac_node *h; /* h(Hode) is the failure function */ - struct _ac_node *g[ALPHABET_SIZE]; /* g(Node,Character) is the - transition function */ -} ACNode; - -typedef struct _ac_trie { -#ifdef HARDDEBUG - Uint32 idc; -#endif - Uint32 counter; /* Number of added patterns */ - ACNode *root; /* pointer to the root state */ -} ACTrie; - -typedef struct _bm_data { - byte *x; - Sint len; - Sint *goodshift; - Sint badshift[ALPHABET_SIZE]; -} BMData; - -#ifdef HARDDEBUG -static void dump_bm_data(BMData *bm); -static void dump_ac_trie(ACTrie *act); -static void dump_ac_node(ACNode *node, int indent, int ch); -#endif - -/* - * The needed size of binary data for a search structure - given the accumulated - * string lengths. - */ -#define BM_SIZE(StrLen) /* StrLen: length of searchstring */ \ -((MYALIGN(sizeof(Sint) * (StrLen))) + /* goodshift array */ \ - MYALIGN(StrLen) + /* searchstring saved */ \ - (MYALIGN(sizeof(BMData)))) /* Structure */ - -#define AC_SIZE(StrLens) /* StrLens: sum of all searchstring lengths */ \ -((MYALIGN(sizeof(ACNode)) * \ -((StrLens)+1)) + /* The actual nodes (including rootnode) */ \ - MYALIGN(sizeof(ACTrie))) /* Structure */ - - -#ifndef MAX -#define MAX(A,B) (((A) > (B)) ? (A) : B) -#endif - -/* - * Callback for the magic binary - */ -static void cleanup_my_data(Binary *bp) -{ - return; -} - -/* - * Initiate a (allocated) micro allocator and fill in the base - * for an Aho-Corasick search trie, given the accumulated length of the search strings. - */ -static ACTrie *create_acdata(MyAllocator *my, Uint len, - ACNode ***qbuff /* out */,Binary **the_bin /* out */) -{ - Uint datasize = AC_SIZE(len); - ACTrie *act; - ACNode *acn; - Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data); - byte *data = ERTS_MAGIC_BIN_DATA(mb); - - init_my_allocator(my, datasize, data); - act = my_alloc(my, sizeof(ACTrie)); /* Important that this is the first - allocation */ - act->counter = 0; - act->root = acn = my_alloc(my, sizeof(ACNode)); - acn->d = 0; - acn->final = 0; - acn->h = NULL; - memset(acn->g, 0, sizeof(ACNode *) * ALPHABET_SIZE); -#ifdef HARDDEBUG - act->idc = 0; - acn->id = 0; -#endif - *qbuff = erts_alloc(ERTS_ALC_T_TMP, sizeof(ACNode *) * len); - *the_bin = mb; - return act; -} - -/* - * The same initialization of allocator and basic data for Boyer-More. - */ -static BMData *create_bmdata(MyAllocator *my, byte *x, Uint len, Binary **the_bin /* out */) -{ - Uint datasize = BM_SIZE(len); - BMData *bmd; - Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data); - byte *data = ERTS_MAGIC_BIN_DATA(mb); - init_my_allocator(my, datasize, data); - bmd = my_alloc(my, sizeof(BMData)); - bmd->x = my_alloc(my,len); - memcpy(bmd->x,x,len); - bmd->len = len; - bmd->goodshift = my_alloc(my,sizeof(Uint) * len); - *the_bin = mb; - return bmd; -} - -/* - * Compilation of search structures - */ - -/* - * Aho Corasick - Build a Trie and fill in the failure functions - * when all strings are added. - * The algorithm is nicely described by Dieter Bühler of University of Tübingen: - * http://www-sr.informatik.uni-tuebingen.de/~buehler/AC/AC.html - */ - -/* - * Helper called ance for each search pattern - */ -static void ac_add_one_pattern(MyAllocator *my, ACTrie *act, byte *x, Uint len) -{ - ACNode *acn = act->root; - Uint32 n = ++act->counter; /* Always increase conter, even if it's a duplicate - as this shall identify the pattern in the - final set and eventually be returned to - the caller (in Erlang) */ - Uint i = 0; - - while(i < len) { - if (acn->g[x[i]] != NULL) { - /* node exists, continue */ - acn = acn->g[x[i]]; - ++i; - } else { - /* allocate a new node */ - ACNode *nn = my_alloc(my,sizeof(ACNode)); -#ifdef HARDDEBUG - nn->id = ++(act->idc); -#endif - nn->d = i+1; - nn->h = act->root; - nn->final = 0; - memset(nn->g, 0, sizeof(ACNode *) * ALPHABET_SIZE); - acn->g[x[i]] = nn; - ++i; - acn = nn; - } - } - if (acn->final == 0) { /* New pattern, add to final set */ - acn->final = n; - } -} - -/* - * Called when all search patterns are added. - */ -static void ac_compute_failure_functions(ACTrie *act, ACNode **qbuff) -{ - ACNode *root = act->root; - ACNode *parent; - int i; - int qh = 0,qt = 0; - ACNode *child, *r; - - /* Set all children of the root to have the root as failure function */ - for (i = 0; i < ALPHABET_SIZE; ++i) { - if (root->g[i] != NULL) { - root->g[i]->h = root; - /* Add to que for later traversal */ - qbuff[qt++] = root->g[i]; - } - } - - /* So, now we've handled children of the root state, traverse the - rest of the trie BF... */ - while (qh < qt) { - parent = qbuff[qh++]; - for (i = 0; i < ALPHABET_SIZE; ++ i) { - if ((child = parent->g[i]) != NULL) { - /* Visit this node to */ - qbuff[qt++] = child; - /* Search for correct failure function, follow the parents failure - function until you find a similar transition funtion to this - childs */ - r = parent->h; - while (r != NULL && r->g[i] == NULL) { - r = r->h; - } - if (r == NULL) { - /* Replace NULL failures with the root as we go */ - child->h = (root->g[i] == NULL) ? root : root->g[i]; - } else { - child->h = r->g[i]; - /* - * The "final" set is scattered among the nodes. When - * the failure function points to a member of the final set, - * we have a match, but we might not see it in the current node - * if we dont mark it as a special type of final, i.e. foolow - * the failure function and you will find a real member of final - * set. This is marked with a negative string id and only done if - * this node does not represent a member in the final set. - */ - if (!(child->final) && (child->h->final)) { - child->final = -1; - } - } - } - } - } - /* Finally the failure function of the root should point to itself */ - root->h = root; -} - - -/* - * The actual searching for needles in the haystack... - * Find first match using Aho-Coracick Trie - * return pattern number and fill in mpos + mlen if found, otherwise return 0 - * Return the matching pattern that *starts* first, and ends - * last (difference when overlapping), hence the candidate thing. - * Basic AC finds the first end before the first start... - * - */ -typedef struct { - ACNode *q; - Uint pos; - Uint len; - ACNode *candidate; - Uint candidate_start; -} ACFindFirstState; - - -static void ac_init_find_first_match(ACFindFirstState *state, ACTrie *act, Sint startpos, Uint len) -{ - state->q = act->root; - state->pos = startpos; - state->len = len; - state->candidate = NULL; - state->candidate_start = 0; -} -#define AC_OK 0 -#define AC_NOT_FOUND -1 -#define AC_RESTART -2 - -#define AC_LOOP_FACTOR 10 - -static int ac_find_first_match(ACFindFirstState *state, byte *haystack, - Uint *mpos, Uint *mlen, Uint *reductions) -{ - ACNode *q = state->q; - Uint i = state->pos; - ACNode *candidate = state->candidate, *r; - Uint len = state->len; - Uint candidate_start = state->candidate_start; - Uint rstart; - register Uint reds = *reductions; - - while (i < len) { - if (--reds == 0) { - state->q = q; - state->pos = i; - state->len = len; - state->candidate = candidate; - state->candidate_start = candidate_start; - return AC_RESTART; - } - - while (q->g[haystack[i]] == NULL && q->h != q) { - q = q->h; - } - if (q->g[haystack[i]] != NULL) { - q = q->g[haystack[i]]; - } -#ifdef HARDDEBUG - erts_printf("ch = %c, Current: %u\n", (int) haystack[i], (unsigned) q->id); -#endif - ++i; - if (candidate != NULL && (i - q->d) > candidate_start) { - break; - } - if (q->final) { - r = q; - while (r->final < 0) - r = r->h; - rstart = i - r->d; - if (candidate == NULL || rstart < candidate_start || - (rstart == candidate_start && candidate->d < q->d)) { - candidate_start = rstart; - candidate = r; - } - } - } - *reductions = reds; - if (!candidate) { - return AC_NOT_FOUND; - } -#ifdef HARDDEBUG - dump_ac_node(candidate,0,'?'); -#endif - *mpos = candidate_start; - *mlen = candidate->d; - return AC_OK; -} - -typedef struct _findall_data { - Uint pos; - Uint len; -#ifdef HARDDEBUG - Uint id; -#endif - Eterm epos; - Eterm elen; -} FindallData; - -typedef struct { - ACNode *q; - Uint pos; - Uint len; - Uint m; - Uint allocated; - FindallData *out; -} ACFindAllState; - -static void ac_init_find_all(ACFindAllState *state, ACTrie *act, Sint startpos, Uint len) -{ - state->q = act->root; - state->pos = startpos; - state->len = len; - state->m = 0; - state->allocated = 0; - state->out = NULL; -} - -static void ac_restore_find_all(ACFindAllState *state, char *buff) -{ - memcpy(state,buff,sizeof(ACFindAllState)); - state->out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * (state->allocated)); - memcpy(state->out,buff+sizeof(ACFindAllState),sizeof(FindallData)*state->m); -} - -static void ac_serialize_find_all(ACFindAllState *state, char *buff) -{ - memcpy(buff,state,sizeof(ACFindAllState)); - memcpy(buff+sizeof(ACFindAllState),state->out,sizeof(FindallData)*state->m); -} - -static void ac_clean_find_all(ACFindAllState *state) -{ - if (state->out != NULL) { - erts_free(ERTS_ALC_T_TMP, state->out); - } -#ifdef HARDDEBUG - state->out = NULL; - state->allocated = 0; -#endif -} - -#define SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(S) (sizeof(ACFindAllState)+(sizeof(FindallData)*(S).m)) - -/* - * Differs to the find_first function in that it stores all matches and the values - * arte returned only in the state. - */ -static int ac_find_all_non_overlapping(ACFindAllState *state, byte *haystack, - Uint *reductions) -{ - ACNode *q = state->q; - Uint i = state->pos; - Uint rstart; - ACNode *r; - Uint len = state->len; - Uint m = state->m, save_m; - Uint allocated = state->allocated; - FindallData *out = state->out; - register Uint reds = *reductions; - - - while (i < len) { - if (--reds == 0) { - state->q = q; - state->pos = i; - state->len = len; - state->m = m; - state->allocated = allocated; - state->out = out; - return AC_RESTART; - } - while (q->g[haystack[i]] == NULL && q->h != q) { - q = q->h; - } - if (q->g[haystack[i]] != NULL) { - q = q->g[haystack[i]]; - } - ++i; - if (q->final) { - r = q; - while (r->final) { - while (r->final < 0) - r = r->h; -#ifdef HARDDEBUG - erts_printf("Trying to add %u\n",(unsigned) r->final); -#endif - rstart = i - r->d; - save_m = m; - while (m > 0 && (out[m-1].pos > rstart || - (out[m-1].pos == rstart && - out[m-1].len < r->d))) { -#ifdef HARDDEBUG - erts_printf("Popping %u\n",(unsigned) out[m-1].id); -#endif - --m; - } -#ifdef HARDDEBUG - if (m > 0) { - erts_printf("Pos %u\n",out[m-1].pos); - erts_printf("Len %u\n",out[m-1].len); - } - erts_printf("Rstart %u\n",rstart); -#endif - if (m == 0 || out[m-1].pos + out[m-1].len <= rstart) { - if (m >= allocated) { - if (!allocated) { - allocated = 10; - out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * allocated); - } else { - allocated *= 2; - out = erts_realloc(ERTS_ALC_T_TMP, out, - sizeof(FindallData) * allocated); - } - } - out[m].pos = rstart; - out[m].len = r->d; -#ifdef HARDDEBUG - out[m].id = r->final; -#endif - ++m; -#ifdef HARDDEBUG - erts_printf("Pushing %u\n",(unsigned) out[m-1].id); -#endif - } else { -#ifdef HARDDEBUG - erts_printf("Backtracking %d steps\n",save_m - m); -#endif - m = save_m; - } - r = r->h; - } - } - } - *reductions = reds; - state->m = m; - state->out = out; - return (m == 0) ? AC_NOT_FOUND : AC_OK; -} - -/* - * Boyer More - most obviously implemented more or less exactly as Christian Charras - * and Thierry Lecroq describes it in "Handbook of Exact String-Matching Algorithms" - * http://www-igm.univ-mlv.fr/~lecroq/string/ - */ - -/* - * Call this to compute badshifts array - */ -static void compute_badshifts(BMData *bmd) -{ - Sint i; - Sint m = bmd->len; - - for (i = 0; i < ALPHABET_SIZE; ++i) { - bmd->badshift[i] = m; - } - for (i = 0; i < m - 1; ++i) { - bmd->badshift[bmd->x[i]] = m - i - 1; - } -} - -/* Helper for "compute_goodshifts" */ -static void compute_suffixes(byte *x, Sint m, Sint *suffixes) -{ - int f,g,i; - - suffixes[m - 1] = m; - - f = 0; /* To avoid use before set warning */ - - g = m - 1; - - for (i = m - 2; i >= 0; --i) { - if (i > g && suffixes[i + m - f] < i - g) { - suffixes[i] = suffixes[i + m - 1 - f]; - } else { - if (i < g) { - g = i; - } - f = i; - while ( g >= 0 && x[g] == x[g + m - 1 - f] ) { - --g; - } - suffixes[i] = f - g; - } - } -} - -/* - * Call this to compute goodshift array - */ -static void compute_goodshifts(BMData *bmd) -{ - Sint m = bmd->len; - byte *x = bmd->x; - Sint i, j; - Sint *suffixes = erts_alloc(ERTS_ALC_T_TMP, m * sizeof(Uint)); - - compute_suffixes(x, m, suffixes); - - for (i = 0; i < m; ++i) { - bmd->goodshift[i] = m; - } - - j = 0; - - for (i = m - 1; i >= -1; --i) { - if (i == -1 || suffixes[i] == i + 1) { - while (j < m - 1 - i) { - if (bmd->goodshift[j] == m) { - bmd->goodshift[j] = m - 1 - i; - } - ++j; - } - } - } - for (i = 0; i <= m - 2; ++i) { - bmd->goodshift[m - 1 - suffixes[i]] = m - 1 - i; - } - erts_free(ERTS_ALC_T_TMP, suffixes); -} - -typedef struct { - Sint pos; - Sint len; -} BMFindFirstState; - -#define BM_OK 0 /* used only for find_all */ -#define BM_NOT_FOUND -1 -#define BM_RESTART -2 -#define BM_LOOP_FACTOR 10 /* Should we have a higher value? */ - -static void bm_init_find_first_match(BMFindFirstState *state, Sint startpos, Uint len) -{ - state->pos = startpos; - state->len = (Sint) len; -} - - -static Sint bm_find_first_match(BMFindFirstState *state, BMData *bmd, byte *haystack, Uint *reductions) -{ - Sint blen = bmd->len; - Sint len = state->len; - Sint *gs = bmd->goodshift; - Sint *bs = bmd->badshift; - byte *needle = bmd->x; - Sint i; - Sint j = state->pos; - register Uint reds = *reductions; - - while (j <= len - blen) { - if (--reds == 0) { - state->pos = j; - return BM_RESTART; - } - for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i) - ; - if (i < 0) { /* found */ - *reductions = reds; - return j; - } - j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i); - } - *reductions = reds; - return BM_NOT_FOUND; -} - -typedef struct { - Sint pos; - Sint len; - Uint m; - Uint allocated; - FindallData *out; -} BMFindAllState; - -static void bm_init_find_all(BMFindAllState *state, Sint startpos, Uint len) -{ - state->pos = startpos; - state->len = (Sint) len; - state->m = 0; - state->allocated = 0; - state->out = NULL; -} - -static void bm_restore_find_all(BMFindAllState *state, char *buff) -{ - memcpy(state,buff,sizeof(BMFindAllState)); - state->out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * (state->allocated)); - memcpy(state->out,buff+sizeof(BMFindAllState),sizeof(FindallData)*state->m); -} - -static void bm_serialize_find_all(BMFindAllState *state, char *buff) -{ - memcpy(buff,state,sizeof(BMFindAllState)); - memcpy(buff+sizeof(BMFindAllState),state->out,sizeof(FindallData)*state->m); -} - -static void bm_clean_find_all(BMFindAllState *state) -{ - if (state->out != NULL) { - erts_free(ERTS_ALC_T_TMP, state->out); - } -#ifdef HARDDEBUG - state->out = NULL; - state->allocated = 0; -#endif -} - -#define SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(S) (sizeof(BMFindAllState)+(sizeof(FindallData)*(S).m)) - -/* - * Differs to the find_first function in that it stores all matches and the values - * arte returned only in the state. - */ -static Sint bm_find_all_non_overlapping(BMFindAllState *state, - BMData *bmd, byte *haystack, Uint *reductions) -{ - Sint blen = bmd->len; - Sint len = state->len; - Sint *gs = bmd->goodshift; - Sint *bs = bmd->badshift; - byte *needle = bmd->x; - Sint i; - Sint j = state->pos; - Uint m = state->m; - Uint allocated = state->allocated; - FindallData *out = state->out; - register Uint reds = *reductions; - - while (j <= len - blen) { - if (--reds == 0) { - state->pos = j; - state->m = m; - state->allocated = allocated; - state->out = out; - return BM_RESTART; - } - for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i) - ; - if (i < 0) { /* found */ - if (m >= allocated) { - if (!allocated) { - allocated = 10; - out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * allocated); - } else { - allocated *= 2; - out = erts_realloc(ERTS_ALC_T_TMP, out, - sizeof(FindallData) * allocated); - } - } - out[m].pos = j; - out[m].len = blen; - ++m; - j += blen; - } else { - j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i); - } - } - state->m = m; - state->out = out; - *reductions = reds; - return (m == 0) ? BM_NOT_FOUND : BM_OK; -} - -/* - * Interface functions (i.e. "bif's") - */ - -/* - * Search functionality interfaces - */ - -static int do_binary_match_compile(Eterm argument, Eterm *tag, Binary **binp) -{ - Eterm t, b, comp_term = NIL; - Uint characters; - Uint words; - - characters = 0; - words = 0; - - if (is_list(argument)) { - t = argument; - while (is_list(t)) { - b = CAR(list_val(t)); - t = CDR(list_val(t)); - if (!is_binary(b)) { - goto badarg; - } - if (binary_bitsize(b) != 0) { - goto badarg; - } - ++words; - characters += binary_size(b); - } - if (is_not_nil(t)) { - goto badarg; - } - if (words > 1) { - comp_term = argument; - } else { - comp_term = CAR(list_val(argument)); - } - } else if (is_binary(argument)) { - if (binary_bitsize(argument) != 0) { - goto badarg; - } - words = 1; - comp_term = argument; - characters = binary_size(argument); - } - - if (characters == 0) { - goto badarg; - } - ASSERT(words > 0); - - if (words == 1) { - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - MyAllocator my; - BMData *bmd; - Binary *bin; - - ERTS_GET_BINARY_BYTES(comp_term, bytes, bitoffs, bitsize); - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(comp_term, &temp_alloc); - } - bmd = create_bmdata(&my, bytes, characters, &bin); - compute_badshifts(bmd); - compute_goodshifts(bmd); - erts_free_aligned_binary_bytes(temp_alloc); - CHECK_ALLOCATOR(my); - *tag = am_bm; - *binp = bin; - return 0; - } else { - ACTrie *act; - MyAllocator my; - ACNode **qbuff; - Binary *bin; - - act = create_acdata(&my, characters, &qbuff, &bin); - t = comp_term; - while (is_list(t)) { - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - b = CAR(list_val(t)); - t = CDR(list_val(t)); - ERTS_GET_BINARY_BYTES(b, bytes, bitoffs, bitsize); - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(b, &temp_alloc); - } - ac_add_one_pattern(&my,act,bytes,binary_size(b)); - erts_free_aligned_binary_bytes(temp_alloc); - } - ac_compute_failure_functions(act,qbuff); - CHECK_ALLOCATOR(my); - erts_free(ERTS_ALC_T_TMP,qbuff); - *tag = am_ac; - *binp = bin; - return 0; - } - badarg: - return -1; -} - -BIF_RETTYPE binary_compile_pattern_1(BIF_ALIST_1) -{ - Binary *bin; - Eterm tag, ret; - Eterm *hp; - - if (do_binary_match_compile(BIF_ARG_1,&tag,&bin)) { - BIF_ERROR(BIF_P,BADARG); - } - hp = HAlloc(BIF_P, PROC_BIN_SIZE+3); - ret = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin); - ret = TUPLE2(hp, tag, ret); - BIF_RET(ret); -} - -#define DO_BIN_MATCH_OK 0 -#define DO_BIN_MATCH_BADARG -1 -#define DO_BIN_MATCH_RESTART -2 - -static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, - Eterm type, Binary *bin, Eterm state_term, Eterm *res_term) -{ - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - - ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc); - } - if (state_term != NIL) { - Eterm *ptr = big_val(state_term); - type = ptr[1]; - } - - if (type == am_bm) { - BMData *bm; - Sint pos; - Eterm ret; - Eterm *hp; - BMFindFirstState state; - Uint reds = get_reds(p, BM_LOOP_FACTOR); - Uint save_reds = reds; - - bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_bm_data(bm); -#endif - if (state_term == NIL) { - bm_init_find_first_match(&state, hsstart, hslen); - } else { - Eterm *ptr = big_val(state_term); - memcpy(&state,ptr+2,sizeof(state)); - } -#ifdef HARDDEBUG - erts_printf("(bm) state->pos = %ld, state->len = %lu\n",state.pos, state.len); -#endif - pos = bm_find_first_match(&state, bm, bytes, &reds); - if (pos == BM_NOT_FOUND) { - ret = am_nomatch; - } else if (pos == BM_RESTART) { - int x = (sizeof(BMFindFirstState) / sizeof(Eterm)) + - !!(sizeof(BMFindFirstState) % sizeof(Eterm)); -#ifdef HARDDEBUG - erts_printf("Trap bm!\n"); -#endif - hp = HAlloc(p,x+2); - hp[0] = make_pos_bignum_header(x+1); - hp[1] = type; - memcpy(hp+2,&state,sizeof(state)); - *res_term = make_big(hp); - erts_free_aligned_binary_bytes(temp_alloc); - return DO_BIN_MATCH_RESTART; - } else { - Eterm erlen = erts_make_integer((Uint) bm->len, p); - ret = erts_make_integer(pos,p); - hp = HAlloc(p,3); - ret = TUPLE2(hp, ret, erlen); - } - erts_free_aligned_binary_bytes(temp_alloc); - BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); - *res_term = ret; - return DO_BIN_MATCH_OK; - } else if (type == am_ac) { - ACTrie *act; - Uint pos, rlen; - int acr; - ACFindFirstState state; - Eterm ret; - Eterm *hp; - Uint reds = get_reds(p, AC_LOOP_FACTOR); - Uint save_reds = reds; - - act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_ac_trie(act); -#endif - if (state_term == NIL) { - ac_init_find_first_match(&state, act, hsstart, hslen); - } else { - Eterm *ptr = big_val(state_term); - memcpy(&state,ptr+2,sizeof(state)); - } - acr = ac_find_first_match(&state, bytes, &pos, &rlen, &reds); - if (acr == AC_NOT_FOUND) { - ret = am_nomatch; - } else if (acr == AC_RESTART) { - int x = (sizeof(state) / sizeof(Eterm)) + - !!(sizeof(BMFindFirstState) % sizeof(Eterm)); -#ifdef HARDDEBUG - erts_printf("Trap ac!\n"); -#endif - hp = HAlloc(p,x+2); - hp[0] = make_pos_bignum_header(x+1); - hp[1] = type; - memcpy(hp+2,&state,sizeof(state)); - *res_term = make_big(hp); - erts_free_aligned_binary_bytes(temp_alloc); - return DO_BIN_MATCH_RESTART; - } else { - Eterm epos = erts_make_integer(pos+hsstart,p); - Eterm erlen = erts_make_integer(rlen,p); - hp = HAlloc(p,3); - ret = TUPLE2(hp, epos, erlen); - } - erts_free_aligned_binary_bytes(temp_alloc); - BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); - *res_term = ret; - return DO_BIN_MATCH_OK; - } - badarg: - return DO_BIN_MATCH_BADARG; -} - -static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, Uint hslen, - Eterm type, Binary *bin, Eterm state_term, Eterm *res_term) -{ - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - - ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc); - } - if (state_term != NIL) { - Eterm *ptr = big_val(state_term); - type = ptr[1]; - } - - if (type == am_bm) { - BMData *bm; - Sint pos; - Eterm ret,tpl; - Eterm *hp; - BMFindAllState state; - Uint reds = get_reds(p, BM_LOOP_FACTOR); - Uint save_reds = reds; - - bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_bm_data(bm); -#endif - if (state_term == NIL) { - bm_init_find_all(&state, hsstart, hslen); - } else { - Eterm *ptr = big_val(state_term); - bm_restore_find_all(&state,(char *) (ptr+2)); - } - - pos = bm_find_all_non_overlapping(&state, bm, bytes, &reds); - if (pos == BM_NOT_FOUND) { - ret = NIL; - } else if (pos == BM_RESTART) { - int x = (SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + - !!(SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm)); -#ifdef HARDDEBUG - erts_printf("Trap bm!\n"); -#endif - hp = HAlloc(p,x+2); - hp[0] = make_pos_bignum_header(x+1); - hp[1] = type; - bm_serialize_find_all(&state, (char *) (hp+2)); - *res_term = make_big(hp); - erts_free_aligned_binary_bytes(temp_alloc); - bm_clean_find_all(&state); - return DO_BIN_MATCH_RESTART; - } else { - FindallData *fad = state.out; - int i; - for (i = 0; i < state.m; ++i) { - fad[i].epos = erts_make_integer(fad[i].pos,p); - fad[i].elen = erts_make_integer(fad[i].len,p); - } - hp = HAlloc(p,state.m * (3 + 2)); - ret = NIL; - for (i = state.m - 1; i >= 0; --i) { - tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); - hp +=3; - ret = CONS(hp,tpl,ret); - hp += 2; - } - } - erts_free_aligned_binary_bytes(temp_alloc); - bm_clean_find_all(&state); - BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); - *res_term = ret; - return DO_BIN_MATCH_OK; - } else if (type == am_ac) { - ACTrie *act; - int acr; - ACFindAllState state; - Eterm ret,tpl; - Eterm *hp; - Uint reds = get_reds(p, AC_LOOP_FACTOR); - Uint save_reds = reds; - - act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); -#ifdef HARDDEBUG - dump_ac_trie(act); -#endif - if (state_term == NIL) { - ac_init_find_all(&state, act, hsstart, hslen); - } else { - Eterm *ptr = big_val(state_term); - ac_restore_find_all(&state,(char *) (ptr+2)); - } - acr = ac_find_all_non_overlapping(&state, bytes, &reds); - if (acr == AC_NOT_FOUND) { - ret = NIL; - } else if (acr == AC_RESTART) { - int x = (SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + - !!(SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm)); -#ifdef HARDDEBUG - erts_printf("Trap ac!\n"); -#endif - hp = HAlloc(p,x+2); - hp[0] = make_pos_bignum_header(x+1); - hp[1] = type; - ac_serialize_find_all(&state, (char *) (hp+2)); - *res_term = make_big(hp); - erts_free_aligned_binary_bytes(temp_alloc); - ac_clean_find_all(&state); - return DO_BIN_MATCH_RESTART; - } else { - FindallData *fad = state.out; - int i; - for (i = 0; i < state.m; ++i) { - fad[i].epos = erts_make_integer(fad[i].pos,p); - fad[i].elen = erts_make_integer(fad[i].len,p); - } - hp = HAlloc(p,state.m * (3 + 2)); - ret = NIL; - for (i = state.m - 1; i >= 0; --i) { - tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); - hp +=3; - ret = CONS(hp,tpl,ret); - hp += 2; - } - } - erts_free_aligned_binary_bytes(temp_alloc); - ac_clean_find_all(&state); - BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); - *res_term = ret; - return DO_BIN_MATCH_OK; - } - badarg: - return DO_BIN_MATCH_BADARG; -} - -static BIF_RETTYPE binary_match_trap(BIF_ALIST_3) -{ - int runres; - Eterm result; - Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val; - runres = do_binary_match(BIF_P,BIF_ARG_1,0,0,NIL,bin,BIF_ARG_2,&result); - switch (runres) { - case DO_BIN_MATCH_OK: - BIF_RET(result); - case DO_BIN_MATCH_RESTART: - BUMP_ALL_REDS(BIF_P); - BIF_TRAP3(&binary_match_trap_export, BIF_P, BIF_ARG_1, result, BIF_ARG_3); - default: - goto badarg; - } - badarg: - BIF_ERROR(BIF_P,BADARG); -} - -static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3) -{ - int runres; - Eterm result; - Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val; - runres = do_binary_matches(BIF_P,BIF_ARG_1,0,0,NIL,bin,BIF_ARG_2,&result); - switch (runres) { - case DO_BIN_MATCH_OK: - BIF_RET(result); - case DO_BIN_MATCH_RESTART: - BUMP_ALL_REDS(BIF_P); - BIF_TRAP3(&binary_matches_trap_export, BIF_P, BIF_ARG_1, result, BIF_ARG_3); - default: - goto badarg; - } - badarg: - BIF_ERROR(BIF_P,BADARG); -} - - -BIF_RETTYPE binary_match_3(BIF_ALIST_3) -{ - Uint hsstart, hslen; - Eterm *tp; - Eterm type; - Binary *bin; - Eterm bin_term = NIL; - int runres; - Eterm result; - - if (is_not_binary(BIF_ARG_1)) { - goto badarg; - } - if (BIF_ARG_3 == ((Eterm) 0)) { - /* Invalid term, we're called from binary_match_2... */ - hsstart = 0; - hslen = binary_size(BIF_ARG_1); - } else if (is_list(BIF_ARG_3)) { - Eterm l = BIF_ARG_3; - while(is_list(l)) { - Eterm t = CAR(list_val(l)); - if (!is_tuple(t)) { - goto badarg; - } - tp = tuple_val(t); - if (arityval(*tp) != 2) { - goto badarg; - } - if (!term_to_Uint(tp[1], &hsstart) || ((hsstart >> 16) >> 16) != 0) { - goto badarg; - } - if (!term_to_Uint(tp[2], &hslen) || ((hslen >> 16) >> 16) != 0) { - goto badarg; - } - if (hslen < hsstart) { - goto badarg; - } - if (hslen > binary_size(BIF_ARG_1)-1) { - goto badarg; /* XXX:PaN or should we take as much as we have ? */ - } - hslen = hslen + 1 - hsstart; - l = CDR(list_val(l)); - } - } else if (BIF_ARG_3 != NIL) { - goto badarg; - } - if (hslen == 0) { - BIF_RET(am_nomatch); - } - if (is_tuple(BIF_ARG_2)) { - tp = tuple_val(BIF_ARG_2); - if (arityval(*tp) != 2 || is_not_atom(tp[1])) { - goto badarg; - } - if (((tp[1] != am_bm) && (tp[1] != am_ac)) || - !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { - goto badarg; - } - type = tp[1]; - bin = ((ProcBin *) binary_val(tp[2]))->val; - if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) { - goto badarg; - } - bin_term = tp[2]; - } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) { - goto badarg; - } - runres = do_binary_match(BIF_P,BIF_ARG_1,hsstart,hslen,type,bin,NIL,&result); - if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { - Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE); - bin_term = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin); - } else if (bin_term == NIL) { - erts_bin_free(bin); - } - switch (runres) { - case DO_BIN_MATCH_OK: - BIF_RET(result); - case DO_BIN_MATCH_RESTART: - BUMP_ALL_REDS(BIF_P); - BIF_TRAP3(&binary_match_trap_export, BIF_P, BIF_ARG_1, result, bin_term); - default: - goto badarg; - } - badarg: - BIF_ERROR(BIF_P,BADARG); -} - -BIF_RETTYPE binary_matches_3(BIF_ALIST_3) -{ - Uint hsstart, hslen; - Eterm *tp; - Eterm type; - Binary *bin; - Eterm bin_term = NIL; - int runres; - Eterm result; - - if (is_not_binary(BIF_ARG_1)) { - goto badarg; - } - if (BIF_ARG_3 == ((Eterm) 0)) { - /* Invalid term, we're called from binary_matches_2... */ - hsstart = 0; - hslen = binary_size(BIF_ARG_1); - } else if (is_list(BIF_ARG_3)) { - Eterm l = BIF_ARG_3; - while(is_list(l)) { - Eterm t = CAR(list_val(l)); - if (!is_tuple(t)) { - goto badarg; - } - tp = tuple_val(t); - if (arityval(*tp) != 2) { - goto badarg; - } - if (!term_to_Uint(tp[1], &hsstart) || ((hsstart >> 16) >> 16) != 0) { - goto badarg; - } - if (!term_to_Uint(tp[2], &hslen) || ((hslen >> 16) >> 16) != 0) { - goto badarg; - } - if (hslen < hsstart) { - goto badarg; - } - if (hslen > binary_size(BIF_ARG_1)-1) { - goto badarg; /* XXX:PaN or should we take as much as we have ? */ - } - hslen = hslen + 1 - hsstart; - l = CDR(list_val(l)); - } - } else if (BIF_ARG_3 != NIL) { - goto badarg; - } - if (hslen == 0) { - BIF_RET(am_nomatch); - } - if (is_tuple(BIF_ARG_2)) { - tp = tuple_val(BIF_ARG_2); - if (arityval(*tp) != 2 || is_not_atom(tp[1])) { - goto badarg; - } - if (((tp[1] != am_bm) && (tp[1] != am_ac)) || - !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { - goto badarg; - } - type = tp[1]; - bin = ((ProcBin *) binary_val(tp[2]))->val; - if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) { - goto badarg; - } - bin_term = tp[2]; - } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) { - goto badarg; - } - runres = do_binary_matches(BIF_P,BIF_ARG_1,hsstart,hslen,type,bin,NIL,&result); - if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { - Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE); - bin_term = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin); - } else if (bin_term == NIL) { - erts_bin_free(bin); - } - switch (runres) { - case DO_BIN_MATCH_OK: - BIF_RET(result); - case DO_BIN_MATCH_RESTART: - BUMP_ALL_REDS(BIF_P); - BIF_TRAP3(&binary_matches_trap_export, BIF_P, BIF_ARG_1, result, bin_term); - default: - goto badarg; - } - badarg: - BIF_ERROR(BIF_P,BADARG); -} - - -BIF_RETTYPE binary_match_2(BIF_ALIST_2) -{ - return binary_match_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); -} - - -BIF_RETTYPE binary_matches_2(BIF_ALIST_2) -{ - return binary_matches_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); -} - -/* - * Hard debug functions (dump) for the search structures - */ - -#ifdef HARDDEBUG -static void dump_bm_data(BMData *bm) -{ - int i,j; - erts_printf("Dumping Boyer-More structure.\n"); - erts_printf("=============================\n"); - erts_printf("Searchstring [%ld]:\n", bm->len); - erts_printf("<<"); - for (i = 0; i < bm->len; ++i) { - if (i > 0) { - erts_printf(", "); - } - erts_printf("%d", (int) bm->x[i]); - if (bm->x[i] >= 'A') { - erts_printf(" ($%c)",(char) bm->x[i]); - } - } - erts_printf(">>\n"); - erts_printf("GoodShift array:\n"); - for (i = 0; i < bm->len; ++i) { - erts_printf("GoodShift[%d]: %ld\n", i, bm->goodshift[i]); - } - erts_printf("BadShift array:\n"); - j = 0; - for (i = 0; i < ALPHABET_SIZE; i += j) { - for (j = 0; i + j < ALPHABET_SIZE && j < 6; ++j) { - erts_printf("BS[%03d]:%02ld, ", i+j, bm->badshift[i+j]); - } - erts_printf("\n"); - } -} - -static void dump_ac_node(ACNode *node, int indent, int ch) { - int i; - char *spaces = erts_alloc(ERTS_ALC_T_TMP, 10 * indent + 1); - memset(spaces,' ',10*indent); - spaces[10*indent] = '\0'; - erts_printf("%s-> %c\n",spaces,ch); - erts_printf("%sId: %u\n",spaces,(unsigned) node->id); - erts_printf("%sD: %u\n",spaces,(unsigned)node->d); - erts_printf("%sFinal: %d\n",spaces,(int)node->final); - erts_printf("%sFail: %u\n",spaces,(unsigned)node->h->id); - erts_free(ERTS_ALC_T_TMP,spaces); - for(i=0;ig[i] != NULL && node->g[i] != node) { - dump_ac_node(node->g[i],indent+1,i); - } - } -} - - -static void dump_ac_trie(ACTrie *act) -{ - erts_printf("Aho Corasick Trie dump.\n"); - erts_printf("=======================\n"); - erts_printf("Node counter: %u\n", (unsigned) act->idc); - erts_printf("Searchstring counter: %u\n", (unsigned) act->counter); - erts_printf("Trie:\n"); - dump_ac_node(act->root, 0, '0'); - return; -} -#endif diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c new file mode 100644 index 0000000000..63c82443c5 --- /dev/null +++ b/erts/emulator/beam/erl_bif_binary.c @@ -0,0 +1,1548 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 1996-2010. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +/* + * NOTE: This file contains the BIF's for the *module* binary in stdlib. + * other BIF's concerning binaries are in binary.c. + */ + + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "sys.h" +#include "erl_vm.h" +#include "global.h" +#include "erl_process.h" +#include "error.h" +#include "bif.h" +#include "big.h" +#include "erl_binary.h" +#include "erl_bits.h" + + +/* + * The native implementation functions for the module binary. + * Searching is implemented using aither Boyer-More or Aho-Corasick + * depending on number of searchstrings (BM if one, AC if more than one). + * Native implementation is mostly for efficiency, nothing + * (except binary:referenced_byte_size) really *needs* to be implemented + * in native code. + */ + +/* #define HARDDEBUG */ + +/* Init and local variables */ + +static Export binary_match_trap_export; +static BIF_RETTYPE binary_match_trap(BIF_ALIST_3); +static Export binary_matches_trap_export; +static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3); +static Uint max_loop_limit; + + +void erts_init_bif_binary(void) +{ + sys_memset((void *) &binary_match_trap_export, 0, sizeof(Export)); + binary_match_trap_export.address = &binary_match_trap_export.code[3]; + binary_match_trap_export.code[0] = am_erlang; + binary_match_trap_export.code[1] = am_binary_match_trap; + binary_match_trap_export.code[2] = 3; + binary_match_trap_export.code[3] = (BeamInstr) em_apply_bif; + binary_match_trap_export.code[4] = (BeamInstr) &binary_match_trap; + + sys_memset((void *) &binary_matches_trap_export, 0, sizeof(Export)); + binary_matches_trap_export.address = &binary_matches_trap_export.code[3]; + binary_matches_trap_export.code[0] = am_erlang; + binary_matches_trap_export.code[1] = am_binary_matches_trap; + binary_matches_trap_export.code[2] = 3; + binary_matches_trap_export.code[3] = (BeamInstr) em_apply_bif; + binary_matches_trap_export.code[4] = (BeamInstr) &binary_matches_trap; + + max_loop_limit = 0; + return; +} + +/* + * Setting the loop_limit for searches for debugging + */ +Sint erts_binary_set_loop_limit(Sint limit) +{ + Sint save = (Sint) max_loop_limit; + if (limit <= 0) { + max_loop_limit = 0; + } else { + max_loop_limit = (Uint) limit; + } + return save; +} + +static Uint get_reds(Process *p, int loop_factor) +{ + Uint reds = ERTS_BIF_REDS_LEFT(p) * loop_factor; + Uint tmp = max_loop_limit; + if (tmp != 0 && tmp < reds) { + return tmp; + } + return reds; +} + +/* + * A micro allocator used when building search structures, just a convenience + * for building structures inside a pre alocated magic binary using + * conventional malloc-like interface. + */ + +#define MYALIGN(Size) (SIZEOF_VOID_P * (((Size) / SIZEOF_VOID_P) + \ + !!(((Size) % SIZEOF_VOID_P)))) + +#ifdef DEBUG +#define CHECK_ALLOCATOR(My) ASSERT((My).current <= ((My).mem + (My).size)) +#else +#define CHECK_ALLOCATOR(My) /* nothing */ +#endif + +typedef struct _my_allocator { + Uint size; + byte *current; + byte *mem; +} MyAllocator; + +static void init_my_allocator(MyAllocator *my, Uint siz, byte *array) +{ + ASSERT((siz % SIZEOF_VOID_P) == 0); + my->size = siz; + my->mem = array; + my->current = my->mem; +} + +static void *my_alloc(MyAllocator *my, Uint size) +{ + void *ptr = my->current; + my->current += MYALIGN(size); + return ptr; +} + +/* + * The search functionality. + * + * The search is byte oriented, which works nicely for UTF-8 as well as + * latin1 data + */ + +#define ALPHABET_SIZE 256 + +typedef struct _ac_node { +#ifdef HARDDEBUG + Uint32 id; /* To identify h pointer targets when + dumping */ +#endif + Uint32 d; /* Depth in trie, also represents the + length (-1) of the matched string if + in final set */ + Sint32 final; /* Members in final set represent + * matches. + * The set representation is scattered + * among the nodes in this way: + * >0 -> this represents a member of + * the final set, <0 -> member of + * final set somewhere in the failure + * chain, + * 0 -> not member of the final set */ + struct _ac_node *h; /* h(Hode) is the failure function */ + struct _ac_node *g[ALPHABET_SIZE]; /* g(Node,Character) is the + transition function */ +} ACNode; + +typedef struct _ac_trie { +#ifdef HARDDEBUG + Uint32 idc; +#endif + Uint32 counter; /* Number of added patterns */ + ACNode *root; /* pointer to the root state */ +} ACTrie; + +typedef struct _bm_data { + byte *x; + Sint len; + Sint *goodshift; + Sint badshift[ALPHABET_SIZE]; +} BMData; + +#ifdef HARDDEBUG +static void dump_bm_data(BMData *bm); +static void dump_ac_trie(ACTrie *act); +static void dump_ac_node(ACNode *node, int indent, int ch); +#endif + +/* + * The needed size of binary data for a search structure - given the + * accumulated string lengths. + */ +#define BM_SIZE(StrLen) /* StrLen: length of searchstring */ \ +((MYALIGN(sizeof(Sint) * (StrLen))) + /* goodshift array */ \ + MYALIGN(StrLen) + /* searchstring saved */ \ + (MYALIGN(sizeof(BMData)))) /* Structure */ + +#define AC_SIZE(StrLens) /* StrLens: sum of all searchstring lengths */ \ +((MYALIGN(sizeof(ACNode)) * \ +((StrLens)+1)) + /* The actual nodes (including rootnode) */ \ + MYALIGN(sizeof(ACTrie))) /* Structure */ + + +#ifndef MAX +#define MAX(A,B) (((A) > (B)) ? (A) : B) +#endif + +/* + * Callback for the magic binary + */ +static void cleanup_my_data(Binary *bp) +{ + return; +} + +/* + * Initiate a (allocated) micro allocator and fill in the base + * for an Aho-Corasick search trie, given the accumulated length of the search + * strings. + */ +static ACTrie *create_acdata(MyAllocator *my, Uint len, + ACNode ***qbuff /* out */, + Binary **the_bin /* out */) +{ + Uint datasize = AC_SIZE(len); + ACTrie *act; + ACNode *acn; + Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data); + byte *data = ERTS_MAGIC_BIN_DATA(mb); + + init_my_allocator(my, datasize, data); + act = my_alloc(my, sizeof(ACTrie)); /* Important that this is the first + allocation */ + act->counter = 0; + act->root = acn = my_alloc(my, sizeof(ACNode)); + acn->d = 0; + acn->final = 0; + acn->h = NULL; + memset(acn->g, 0, sizeof(ACNode *) * ALPHABET_SIZE); +#ifdef HARDDEBUG + act->idc = 0; + acn->id = 0; +#endif + *qbuff = erts_alloc(ERTS_ALC_T_TMP, sizeof(ACNode *) * len); + *the_bin = mb; + return act; +} + +/* + * The same initialization of allocator and basic data for Boyer-More. + */ +static BMData *create_bmdata(MyAllocator *my, byte *x, Uint len, + Binary **the_bin /* out */) +{ + Uint datasize = BM_SIZE(len); + BMData *bmd; + Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data); + byte *data = ERTS_MAGIC_BIN_DATA(mb); + init_my_allocator(my, datasize, data); + bmd = my_alloc(my, sizeof(BMData)); + bmd->x = my_alloc(my,len); + memcpy(bmd->x,x,len); + bmd->len = len; + bmd->goodshift = my_alloc(my,sizeof(Uint) * len); + *the_bin = mb; + return bmd; +} + +/* + * Compilation of search structures + */ + +/* + * Aho Corasick - Build a Trie and fill in the failure functions + * when all strings are added. + * The algorithm is nicely described by Dieter Bühler of University of + * Tübingen: + * http://www-sr.informatik.uni-tuebingen.de/~buehler/AC/AC.html + */ + +/* + * Helper called once for each search pattern + */ +static void ac_add_one_pattern(MyAllocator *my, ACTrie *act, byte *x, Uint len) +{ + ACNode *acn = act->root; + Uint32 n = ++act->counter; /* Always increase conter, even if it's a + duplicate as this may identify the pattern + in the final set (not in current interface + though) */ + Uint i = 0; + + while(i < len) { + if (acn->g[x[i]] != NULL) { + /* node exists, continue */ + acn = acn->g[x[i]]; + ++i; + } else { + /* allocate a new node */ + ACNode *nn = my_alloc(my,sizeof(ACNode)); +#ifdef HARDDEBUG + nn->id = ++(act->idc); +#endif + nn->d = i+1; + nn->h = act->root; + nn->final = 0; + memset(nn->g, 0, sizeof(ACNode *) * ALPHABET_SIZE); + acn->g[x[i]] = nn; + ++i; + acn = nn; + } + } + if (acn->final == 0) { /* New pattern, add to final set */ + acn->final = n; + } +} + +/* + * Called when all search patterns are added. + */ +static void ac_compute_failure_functions(ACTrie *act, ACNode **qbuff) +{ + ACNode *root = act->root; + ACNode *parent; + int i; + int qh = 0,qt = 0; + ACNode *child, *r; + + /* Set all children of the root to have the root as failure function */ + for (i = 0; i < ALPHABET_SIZE; ++i) { + if (root->g[i] != NULL) { + root->g[i]->h = root; + /* Add to que for later traversal */ + qbuff[qt++] = root->g[i]; + } + } + + /* So, now we've handled children of the root state, traverse the + rest of the trie BF... */ + while (qh < qt) { + parent = qbuff[qh++]; + for (i = 0; i < ALPHABET_SIZE; ++ i) { + if ((child = parent->g[i]) != NULL) { + /* Visit this node to */ + qbuff[qt++] = child; + /* Search for correct failure function, follow the parents + failure function until you find a similar transition + funtion to this childs */ + r = parent->h; + while (r != NULL && r->g[i] == NULL) { + r = r->h; + } + if (r == NULL) { + /* Replace NULL failures with the root as we go */ + child->h = (root->g[i] == NULL) ? root : root->g[i]; + } else { + child->h = r->g[i]; + /* + * The "final" set is scattered among the nodes. When + * the failure function points to a member of the final + * set, we have a match, but we might not see it in the + * current node if we dont mark it as a special type of + * final, i.e. foolow the failure function and you will + * find a real member of final set. This is marked with + * a negative string id and only done if this node does + * not represent a member in the final set. + */ + if (!(child->final) && (child->h->final)) { + child->final = -1; + } + } + } + } + } + /* Finally the failure function of the root should point to itself */ + root->h = root; +} + + +/* + * The actual searching for needles in the haystack... + * Find first match using Aho-Coracick Trie + * return pattern number and fill in mpos + mlen if found, otherwise return 0 + * Return the matching pattern that *starts* first, and ends + * last (difference when overlapping), hence the candidate thing. + * Basic AC finds the first end before the first start... + * + */ +typedef struct { + ACNode *q; + Uint pos; + Uint len; + ACNode *candidate; + Uint candidate_start; +} ACFindFirstState; + + +static void ac_init_find_first_match(ACFindFirstState *state, ACTrie *act, Sint startpos, Uint len) +{ + state->q = act->root; + state->pos = startpos; + state->len = len; + state->candidate = NULL; + state->candidate_start = 0; +} +#define AC_OK 0 +#define AC_NOT_FOUND -1 +#define AC_RESTART -2 + +#define AC_LOOP_FACTOR 10 + +static int ac_find_first_match(ACFindFirstState *state, byte *haystack, + Uint *mpos, Uint *mlen, Uint *reductions) +{ + ACNode *q = state->q; + Uint i = state->pos; + ACNode *candidate = state->candidate, *r; + Uint len = state->len; + Uint candidate_start = state->candidate_start; + Uint rstart; + register Uint reds = *reductions; + + while (i < len) { + if (--reds == 0) { + state->q = q; + state->pos = i; + state->len = len; + state->candidate = candidate; + state->candidate_start = candidate_start; + return AC_RESTART; + } + + while (q->g[haystack[i]] == NULL && q->h != q) { + q = q->h; + } + if (q->g[haystack[i]] != NULL) { + q = q->g[haystack[i]]; + } +#ifdef HARDDEBUG + erts_printf("ch = %c, Current: %u\n", (int) haystack[i], (unsigned) q->id); +#endif + ++i; + if (candidate != NULL && (i - q->d) > candidate_start) { + break; + } + if (q->final) { + r = q; + while (r->final < 0) + r = r->h; + rstart = i - r->d; + if (candidate == NULL || rstart < candidate_start || + (rstart == candidate_start && candidate->d < q->d)) { + candidate_start = rstart; + candidate = r; + } + } + } + *reductions = reds; + if (!candidate) { + return AC_NOT_FOUND; + } +#ifdef HARDDEBUG + dump_ac_node(candidate,0,'?'); +#endif + *mpos = candidate_start; + *mlen = candidate->d; + return AC_OK; +} + +typedef struct _findall_data { + Uint pos; + Uint len; +#ifdef HARDDEBUG + Uint id; +#endif + Eterm epos; + Eterm elen; +} FindallData; + +typedef struct { + ACNode *q; + Uint pos; + Uint len; + Uint m; + Uint allocated; + FindallData *out; +} ACFindAllState; + +static void ac_init_find_all(ACFindAllState *state, ACTrie *act, Sint startpos, Uint len) +{ + state->q = act->root; + state->pos = startpos; + state->len = len; + state->m = 0; + state->allocated = 0; + state->out = NULL; +} + +static void ac_restore_find_all(ACFindAllState *state, char *buff) +{ + memcpy(state,buff,sizeof(ACFindAllState)); + state->out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * (state->allocated)); + memcpy(state->out,buff+sizeof(ACFindAllState),sizeof(FindallData)*state->m); +} + +static void ac_serialize_find_all(ACFindAllState *state, char *buff) +{ + memcpy(buff,state,sizeof(ACFindAllState)); + memcpy(buff+sizeof(ACFindAllState),state->out,sizeof(FindallData)*state->m); +} + +static void ac_clean_find_all(ACFindAllState *state) +{ + if (state->out != NULL) { + erts_free(ERTS_ALC_T_TMP, state->out); + } +#ifdef HARDDEBUG + state->out = NULL; + state->allocated = 0; +#endif +} + +#define SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(S) \ + (sizeof(ACFindAllState)+(sizeof(FindallData)*(S).m)) + +/* + * Differs to the find_first function in that it stores all matches and the values + * arte returned only in the state. + */ +static int ac_find_all_non_overlapping(ACFindAllState *state, byte *haystack, + Uint *reductions) +{ + ACNode *q = state->q; + Uint i = state->pos; + Uint rstart; + ACNode *r; + Uint len = state->len; + Uint m = state->m, save_m; + Uint allocated = state->allocated; + FindallData *out = state->out; + register Uint reds = *reductions; + + + while (i < len) { + if (--reds == 0) { + state->q = q; + state->pos = i; + state->len = len; + state->m = m; + state->allocated = allocated; + state->out = out; + return AC_RESTART; + } + while (q->g[haystack[i]] == NULL && q->h != q) { + q = q->h; + } + if (q->g[haystack[i]] != NULL) { + q = q->g[haystack[i]]; + } + ++i; + if (q->final) { + r = q; + while (r->final) { + while (r->final < 0) + r = r->h; +#ifdef HARDDEBUG + erts_printf("Trying to add %u\n",(unsigned) r->final); +#endif + rstart = i - r->d; + save_m = m; + while (m > 0 && (out[m-1].pos > rstart || + (out[m-1].pos == rstart && + out[m-1].len < r->d))) { +#ifdef HARDDEBUG + erts_printf("Popping %u\n",(unsigned) out[m-1].id); +#endif + --m; + } +#ifdef HARDDEBUG + if (m > 0) { + erts_printf("Pos %u\n",out[m-1].pos); + erts_printf("Len %u\n",out[m-1].len); + } + erts_printf("Rstart %u\n",rstart); +#endif + if (m == 0 || out[m-1].pos + out[m-1].len <= rstart) { + if (m >= allocated) { + if (!allocated) { + allocated = 10; + out = erts_alloc(ERTS_ALC_T_TMP, + sizeof(FindallData) * allocated); + } else { + allocated *= 2; + out = erts_realloc(ERTS_ALC_T_TMP, out, + sizeof(FindallData) * + allocated); + } + } + out[m].pos = rstart; + out[m].len = r->d; +#ifdef HARDDEBUG + out[m].id = r->final; +#endif + ++m; +#ifdef HARDDEBUG + erts_printf("Pushing %u\n",(unsigned) out[m-1].id); +#endif + } else { +#ifdef HARDDEBUG + erts_printf("Backtracking %d steps\n",save_m - m); +#endif + m = save_m; + } + r = r->h; + } + } + } + *reductions = reds; + state->m = m; + state->out = out; + return (m == 0) ? AC_NOT_FOUND : AC_OK; +} + +/* + * Boyer More - most obviously implemented more or less exactly as + * Christian Charras and Thierry Lecroq describes it in "Handbook of + * Exact String-Matching Algorithms" + * http://www-igm.univ-mlv.fr/~lecroq/string/ + */ + +/* + * Call this to compute badshifts array + */ +static void compute_badshifts(BMData *bmd) +{ + Sint i; + Sint m = bmd->len; + + for (i = 0; i < ALPHABET_SIZE; ++i) { + bmd->badshift[i] = m; + } + for (i = 0; i < m - 1; ++i) { + bmd->badshift[bmd->x[i]] = m - i - 1; + } +} + +/* Helper for "compute_goodshifts" */ +static void compute_suffixes(byte *x, Sint m, Sint *suffixes) +{ + int f,g,i; + + suffixes[m - 1] = m; + + f = 0; /* To avoid use before set warning */ + + g = m - 1; + + for (i = m - 2; i >= 0; --i) { + if (i > g && suffixes[i + m - f] < i - g) { + suffixes[i] = suffixes[i + m - 1 - f]; + } else { + if (i < g) { + g = i; + } + f = i; + while ( g >= 0 && x[g] == x[g + m - 1 - f] ) { + --g; + } + suffixes[i] = f - g; + } + } +} + +/* + * Call this to compute goodshift array + */ +static void compute_goodshifts(BMData *bmd) +{ + Sint m = bmd->len; + byte *x = bmd->x; + Sint i, j; + Sint *suffixes = erts_alloc(ERTS_ALC_T_TMP, m * sizeof(Uint)); + + compute_suffixes(x, m, suffixes); + + for (i = 0; i < m; ++i) { + bmd->goodshift[i] = m; + } + + j = 0; + + for (i = m - 1; i >= -1; --i) { + if (i == -1 || suffixes[i] == i + 1) { + while (j < m - 1 - i) { + if (bmd->goodshift[j] == m) { + bmd->goodshift[j] = m - 1 - i; + } + ++j; + } + } + } + for (i = 0; i <= m - 2; ++i) { + bmd->goodshift[m - 1 - suffixes[i]] = m - 1 - i; + } + erts_free(ERTS_ALC_T_TMP, suffixes); +} + +typedef struct { + Sint pos; + Sint len; +} BMFindFirstState; + +#define BM_OK 0 /* used only for find_all */ +#define BM_NOT_FOUND -1 +#define BM_RESTART -2 +#define BM_LOOP_FACTOR 10 /* Should we have a higher value? */ + +static void bm_init_find_first_match(BMFindFirstState *state, Sint startpos, + Uint len) +{ + state->pos = startpos; + state->len = (Sint) len; +} + + +static Sint bm_find_first_match(BMFindFirstState *state, BMData *bmd, + byte *haystack, Uint *reductions) +{ + Sint blen = bmd->len; + Sint len = state->len; + Sint *gs = bmd->goodshift; + Sint *bs = bmd->badshift; + byte *needle = bmd->x; + Sint i; + Sint j = state->pos; + register Uint reds = *reductions; + + while (j <= len - blen) { + if (--reds == 0) { + state->pos = j; + return BM_RESTART; + } + for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i) + ; + if (i < 0) { /* found */ + *reductions = reds; + return j; + } + j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i); + } + *reductions = reds; + return BM_NOT_FOUND; +} + +typedef struct { + Sint pos; + Sint len; + Uint m; + Uint allocated; + FindallData *out; +} BMFindAllState; + +static void bm_init_find_all(BMFindAllState *state, Sint startpos, Uint len) +{ + state->pos = startpos; + state->len = (Sint) len; + state->m = 0; + state->allocated = 0; + state->out = NULL; +} + +static void bm_restore_find_all(BMFindAllState *state, char *buff) +{ + memcpy(state,buff,sizeof(BMFindAllState)); + state->out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * + (state->allocated)); + memcpy(state->out,buff+sizeof(BMFindAllState), + sizeof(FindallData)*state->m); +} + +static void bm_serialize_find_all(BMFindAllState *state, char *buff) +{ + memcpy(buff,state,sizeof(BMFindAllState)); + memcpy(buff+sizeof(BMFindAllState),state->out, + sizeof(FindallData)*state->m); +} + +static void bm_clean_find_all(BMFindAllState *state) +{ + if (state->out != NULL) { + erts_free(ERTS_ALC_T_TMP, state->out); + } +#ifdef HARDDEBUG + state->out = NULL; + state->allocated = 0; +#endif +} + +#define SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(S) \ + (sizeof(BMFindAllState)+(sizeof(FindallData)*(S).m)) + +/* + * Differs to the find_first function in that it stores all matches and the + * values are returned only in the state. + */ +static Sint bm_find_all_non_overlapping(BMFindAllState *state, + BMData *bmd, byte *haystack, + Uint *reductions) +{ + Sint blen = bmd->len; + Sint len = state->len; + Sint *gs = bmd->goodshift; + Sint *bs = bmd->badshift; + byte *needle = bmd->x; + Sint i; + Sint j = state->pos; + Uint m = state->m; + Uint allocated = state->allocated; + FindallData *out = state->out; + register Uint reds = *reductions; + + while (j <= len - blen) { + if (--reds == 0) { + state->pos = j; + state->m = m; + state->allocated = allocated; + state->out = out; + return BM_RESTART; + } + for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i) + ; + if (i < 0) { /* found */ + if (m >= allocated) { + if (!allocated) { + allocated = 10; + out = erts_alloc(ERTS_ALC_T_TMP, sizeof(FindallData) * allocated); + } else { + allocated *= 2; + out = erts_realloc(ERTS_ALC_T_TMP, out, + sizeof(FindallData) * allocated); + } + } + out[m].pos = j; + out[m].len = blen; + ++m; + j += blen; + } else { + j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i); + } + } + state->m = m; + state->out = out; + *reductions = reds; + return (m == 0) ? BM_NOT_FOUND : BM_OK; +} + +/* + * Interface functions (i.e. "bif's") + */ + +/* + * Search functionality interfaces + */ + +static int do_binary_match_compile(Eterm argument, Eterm *tag, Binary **binp) +{ + Eterm t, b, comp_term = NIL; + Uint characters; + Uint words; + + characters = 0; + words = 0; + + if (is_list(argument)) { + t = argument; + while (is_list(t)) { + b = CAR(list_val(t)); + t = CDR(list_val(t)); + if (!is_binary(b)) { + goto badarg; + } + if (binary_bitsize(b) != 0) { + goto badarg; + } + ++words; + characters += binary_size(b); + } + if (is_not_nil(t)) { + goto badarg; + } + if (words > 1) { + comp_term = argument; + } else { + comp_term = CAR(list_val(argument)); + } + } else if (is_binary(argument)) { + if (binary_bitsize(argument) != 0) { + goto badarg; + } + words = 1; + comp_term = argument; + characters = binary_size(argument); + } + + if (characters == 0) { + goto badarg; + } + ASSERT(words > 0); + + if (words == 1) { + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + MyAllocator my; + BMData *bmd; + Binary *bin; + + ERTS_GET_BINARY_BYTES(comp_term, bytes, bitoffs, bitsize); + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(comp_term, &temp_alloc); + } + bmd = create_bmdata(&my, bytes, characters, &bin); + compute_badshifts(bmd); + compute_goodshifts(bmd); + erts_free_aligned_binary_bytes(temp_alloc); + CHECK_ALLOCATOR(my); + *tag = am_bm; + *binp = bin; + return 0; + } else { + ACTrie *act; + MyAllocator my; + ACNode **qbuff; + Binary *bin; + + act = create_acdata(&my, characters, &qbuff, &bin); + t = comp_term; + while (is_list(t)) { + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + b = CAR(list_val(t)); + t = CDR(list_val(t)); + ERTS_GET_BINARY_BYTES(b, bytes, bitoffs, bitsize); + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(b, &temp_alloc); + } + ac_add_one_pattern(&my,act,bytes,binary_size(b)); + erts_free_aligned_binary_bytes(temp_alloc); + } + ac_compute_failure_functions(act,qbuff); + CHECK_ALLOCATOR(my); + erts_free(ERTS_ALC_T_TMP,qbuff); + *tag = am_ac; + *binp = bin; + return 0; + } + badarg: + return -1; +} + +BIF_RETTYPE binary_compile_pattern_1(BIF_ALIST_1) +{ + Binary *bin; + Eterm tag, ret; + Eterm *hp; + + if (do_binary_match_compile(BIF_ARG_1,&tag,&bin)) { + BIF_ERROR(BIF_P,BADARG); + } + hp = HAlloc(BIF_P, PROC_BIN_SIZE+3); + ret = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin); + ret = TUPLE2(hp, tag, ret); + BIF_RET(ret); +} + +#define DO_BIN_MATCH_OK 0 +#define DO_BIN_MATCH_BADARG -1 +#define DO_BIN_MATCH_RESTART -2 + +static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, + Eterm type, Binary *bin, Eterm state_term, + Eterm *res_term) +{ + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + + ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc); + } + if (state_term != NIL) { + Eterm *ptr = big_val(state_term); + type = ptr[1]; + } + + if (type == am_bm) { + BMData *bm; + Sint pos; + Eterm ret; + Eterm *hp; + BMFindFirstState state; + Uint reds = get_reds(p, BM_LOOP_FACTOR); + Uint save_reds = reds; + + bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); +#ifdef HARDDEBUG + dump_bm_data(bm); +#endif + if (state_term == NIL) { + bm_init_find_first_match(&state, hsstart, hslen); + } else { + Eterm *ptr = big_val(state_term); + memcpy(&state,ptr+2,sizeof(state)); + } +#ifdef HARDDEBUG + erts_printf("(bm) state->pos = %ld, state->len = %lu\n",state.pos, + state.len); +#endif + pos = bm_find_first_match(&state, bm, bytes, &reds); + if (pos == BM_NOT_FOUND) { + ret = am_nomatch; + } else if (pos == BM_RESTART) { + int x = (sizeof(BMFindFirstState) / sizeof(Eterm)) + + !!(sizeof(BMFindFirstState) % sizeof(Eterm)); +#ifdef HARDDEBUG + erts_printf("Trap bm!\n"); +#endif + hp = HAlloc(p,x+2); + hp[0] = make_pos_bignum_header(x+1); + hp[1] = type; + memcpy(hp+2,&state,sizeof(state)); + *res_term = make_big(hp); + erts_free_aligned_binary_bytes(temp_alloc); + return DO_BIN_MATCH_RESTART; + } else { + Eterm erlen = erts_make_integer((Uint) bm->len, p); + ret = erts_make_integer(pos,p); + hp = HAlloc(p,3); + ret = TUPLE2(hp, ret, erlen); + } + erts_free_aligned_binary_bytes(temp_alloc); + BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); + *res_term = ret; + return DO_BIN_MATCH_OK; + } else if (type == am_ac) { + ACTrie *act; + Uint pos, rlen; + int acr; + ACFindFirstState state; + Eterm ret; + Eterm *hp; + Uint reds = get_reds(p, AC_LOOP_FACTOR); + Uint save_reds = reds; + + act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); +#ifdef HARDDEBUG + dump_ac_trie(act); +#endif + if (state_term == NIL) { + ac_init_find_first_match(&state, act, hsstart, hslen); + } else { + Eterm *ptr = big_val(state_term); + memcpy(&state,ptr+2,sizeof(state)); + } + acr = ac_find_first_match(&state, bytes, &pos, &rlen, &reds); + if (acr == AC_NOT_FOUND) { + ret = am_nomatch; + } else if (acr == AC_RESTART) { + int x = (sizeof(state) / sizeof(Eterm)) + + !!(sizeof(BMFindFirstState) % sizeof(Eterm)); +#ifdef HARDDEBUG + erts_printf("Trap ac!\n"); +#endif + hp = HAlloc(p,x+2); + hp[0] = make_pos_bignum_header(x+1); + hp[1] = type; + memcpy(hp+2,&state,sizeof(state)); + *res_term = make_big(hp); + erts_free_aligned_binary_bytes(temp_alloc); + return DO_BIN_MATCH_RESTART; + } else { + Eterm epos = erts_make_integer(pos+hsstart,p); + Eterm erlen = erts_make_integer(rlen,p); + hp = HAlloc(p,3); + ret = TUPLE2(hp, epos, erlen); + } + erts_free_aligned_binary_bytes(temp_alloc); + BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); + *res_term = ret; + return DO_BIN_MATCH_OK; + } + badarg: + return DO_BIN_MATCH_BADARG; +} + +static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, + Uint hslen, Eterm type, Binary *bin, + Eterm state_term, Eterm *res_term) +{ + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + + ERTS_GET_BINARY_BYTES(subject, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(subject, &temp_alloc); + } + if (state_term != NIL) { + Eterm *ptr = big_val(state_term); + type = ptr[1]; + } + + if (type == am_bm) { + BMData *bm; + Sint pos; + Eterm ret,tpl; + Eterm *hp; + BMFindAllState state; + Uint reds = get_reds(p, BM_LOOP_FACTOR); + Uint save_reds = reds; + + bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); +#ifdef HARDDEBUG + dump_bm_data(bm); +#endif + if (state_term == NIL) { + bm_init_find_all(&state, hsstart, hslen); + } else { + Eterm *ptr = big_val(state_term); + bm_restore_find_all(&state,(char *) (ptr+2)); + } + + pos = bm_find_all_non_overlapping(&state, bm, bytes, &reds); + if (pos == BM_NOT_FOUND) { + ret = NIL; + } else if (pos == BM_RESTART) { + int x = + (SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + + !!(SIZEOF_BM_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm)); +#ifdef HARDDEBUG + erts_printf("Trap bm!\n"); +#endif + hp = HAlloc(p,x+2); + hp[0] = make_pos_bignum_header(x+1); + hp[1] = type; + bm_serialize_find_all(&state, (char *) (hp+2)); + *res_term = make_big(hp); + erts_free_aligned_binary_bytes(temp_alloc); + bm_clean_find_all(&state); + return DO_BIN_MATCH_RESTART; + } else { + FindallData *fad = state.out; + int i; + for (i = 0; i < state.m; ++i) { + fad[i].epos = erts_make_integer(fad[i].pos,p); + fad[i].elen = erts_make_integer(fad[i].len,p); + } + hp = HAlloc(p,state.m * (3 + 2)); + ret = NIL; + for (i = state.m - 1; i >= 0; --i) { + tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); + hp +=3; + ret = CONS(hp,tpl,ret); + hp += 2; + } + } + erts_free_aligned_binary_bytes(temp_alloc); + bm_clean_find_all(&state); + BUMP_REDS(p, (save_reds - reds) / BM_LOOP_FACTOR); + *res_term = ret; + return DO_BIN_MATCH_OK; + } else if (type == am_ac) { + ACTrie *act; + int acr; + ACFindAllState state; + Eterm ret,tpl; + Eterm *hp; + Uint reds = get_reds(p, AC_LOOP_FACTOR); + Uint save_reds = reds; + + act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); +#ifdef HARDDEBUG + dump_ac_trie(act); +#endif + if (state_term == NIL) { + ac_init_find_all(&state, act, hsstart, hslen); + } else { + Eterm *ptr = big_val(state_term); + ac_restore_find_all(&state,(char *) (ptr+2)); + } + acr = ac_find_all_non_overlapping(&state, bytes, &reds); + if (acr == AC_NOT_FOUND) { + ret = NIL; + } else if (acr == AC_RESTART) { + int x = + (SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) / sizeof(Eterm)) + + !!(SIZEOF_AC_SERIALIZED_FIND_ALL_STATE(state) % sizeof(Eterm)); +#ifdef HARDDEBUG + erts_printf("Trap ac!\n"); +#endif + hp = HAlloc(p,x+2); + hp[0] = make_pos_bignum_header(x+1); + hp[1] = type; + ac_serialize_find_all(&state, (char *) (hp+2)); + *res_term = make_big(hp); + erts_free_aligned_binary_bytes(temp_alloc); + ac_clean_find_all(&state); + return DO_BIN_MATCH_RESTART; + } else { + FindallData *fad = state.out; + int i; + for (i = 0; i < state.m; ++i) { + fad[i].epos = erts_make_integer(fad[i].pos,p); + fad[i].elen = erts_make_integer(fad[i].len,p); + } + hp = HAlloc(p,state.m * (3 + 2)); + ret = NIL; + for (i = state.m - 1; i >= 0; --i) { + tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); + hp +=3; + ret = CONS(hp,tpl,ret); + hp += 2; + } + } + erts_free_aligned_binary_bytes(temp_alloc); + ac_clean_find_all(&state); + BUMP_REDS(p, (save_reds - reds) / AC_LOOP_FACTOR); + *res_term = ret; + return DO_BIN_MATCH_OK; + } + badarg: + return DO_BIN_MATCH_BADARG; +} + +static BIF_RETTYPE binary_match_trap(BIF_ALIST_3) +{ + int runres; + Eterm result; + Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val; + runres = do_binary_match(BIF_P,BIF_ARG_1,0,0,NIL,bin,BIF_ARG_2,&result); + switch (runres) { + case DO_BIN_MATCH_OK: + BIF_RET(result); + case DO_BIN_MATCH_RESTART: + BUMP_ALL_REDS(BIF_P); + BIF_TRAP3(&binary_match_trap_export, BIF_P, BIF_ARG_1, result, + BIF_ARG_3); + default: + goto badarg; + } + badarg: + BIF_ERROR(BIF_P,BADARG); +} + +static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3) +{ + int runres; + Eterm result; + Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val; + runres = do_binary_matches(BIF_P,BIF_ARG_1,0,0,NIL,bin,BIF_ARG_2,&result); + switch (runres) { + case DO_BIN_MATCH_OK: + BIF_RET(result); + case DO_BIN_MATCH_RESTART: + BUMP_ALL_REDS(BIF_P); + BIF_TRAP3(&binary_matches_trap_export, BIF_P, BIF_ARG_1, result, + BIF_ARG_3); + default: + goto badarg; + } + badarg: + BIF_ERROR(BIF_P,BADARG); +} + + +BIF_RETTYPE binary_match_3(BIF_ALIST_3) +{ + Uint hsstart, hslen; + Eterm *tp; + Eterm type; + Binary *bin; + Eterm bin_term = NIL; + int runres; + Eterm result; + + if (is_not_binary(BIF_ARG_1)) { + goto badarg; + } + if (BIF_ARG_3 == ((Eterm) 0)) { + /* Invalid term, we're called from binary_match_2... */ + hsstart = 0; + hslen = binary_size(BIF_ARG_1); + } else if (is_list(BIF_ARG_3)) { + Eterm l = BIF_ARG_3; + while(is_list(l)) { + Eterm t = CAR(list_val(l)); + if (!is_tuple(t)) { + goto badarg; + } + tp = tuple_val(t); + if (arityval(*tp) != 2) { + goto badarg; + } + if (!term_to_Uint(tp[1], &hsstart) || + ((hsstart >> 16) >> 15) != 0) { + goto badarg; + } + if (!term_to_Uint(tp[2], &hslen) || + ((hslen >> 16) >> 15) != 0) { + goto badarg; + } + if (hslen < hsstart) { + goto badarg; + } + if (hslen > binary_size(BIF_ARG_1)-1) { + goto badarg; /* XXX:PaN or should we take as much as we have ? */ + } + hslen = hslen + 1 - hsstart; + l = CDR(list_val(l)); + } + } else if (BIF_ARG_3 != NIL) { + goto badarg; + } + if (hslen == 0) { + BIF_RET(am_nomatch); + } + if (is_tuple(BIF_ARG_2)) { + tp = tuple_val(BIF_ARG_2); + if (arityval(*tp) != 2 || is_not_atom(tp[1])) { + goto badarg; + } + if (((tp[1] != am_bm) && (tp[1] != am_ac)) || + !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { + goto badarg; + } + type = tp[1]; + bin = ((ProcBin *) binary_val(tp[2]))->val; + if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) { + goto badarg; + } + bin_term = tp[2]; + } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) { + goto badarg; + } + runres = do_binary_match(BIF_P,BIF_ARG_1,hsstart,hslen,type,bin,NIL,&result); + if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { + Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE); + bin_term = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin); + } else if (bin_term == NIL) { + erts_bin_free(bin); + } + switch (runres) { + case DO_BIN_MATCH_OK: + BIF_RET(result); + case DO_BIN_MATCH_RESTART: + BUMP_ALL_REDS(BIF_P); + BIF_TRAP3(&binary_match_trap_export, BIF_P, BIF_ARG_1, result, bin_term); + default: + goto badarg; + } + badarg: + BIF_ERROR(BIF_P,BADARG); +} + +BIF_RETTYPE binary_matches_3(BIF_ALIST_3) +{ + Uint hsstart, hslen; + Eterm *tp; + Eterm type; + Binary *bin; + Eterm bin_term = NIL; + int runres; + Eterm result; + + if (is_not_binary(BIF_ARG_1)) { + goto badarg; + } + if (BIF_ARG_3 == ((Eterm) 0)) { + /* Invalid term, we're called from binary_matches_2... */ + hsstart = 0; + hslen = binary_size(BIF_ARG_1); + } else if (is_list(BIF_ARG_3)) { + Eterm l = BIF_ARG_3; + while(is_list(l)) { + Eterm t = CAR(list_val(l)); + if (!is_tuple(t)) { + goto badarg; + } + tp = tuple_val(t); + if (arityval(*tp) != 2) { + goto badarg; + } + if (!term_to_Uint(tp[1], &hsstart) || + ((hsstart >> 16) >> 15) != 0) { + goto badarg; + } + if (!term_to_Uint(tp[2], &hslen) || + ((hslen >> 16) >> 15) != 0) { + goto badarg; + } + if (hslen < hsstart) { + goto badarg; + } + if (hslen > binary_size(BIF_ARG_1)-1) { + goto badarg; /* XXX:PaN or should we take as much as we + have ? */ + } + hslen = hslen + 1 - hsstart; + l = CDR(list_val(l)); + } + } else if (BIF_ARG_3 != NIL) { + goto badarg; + } + if (hslen == 0) { + BIF_RET(am_nomatch); + } + if (is_tuple(BIF_ARG_2)) { + tp = tuple_val(BIF_ARG_2); + if (arityval(*tp) != 2 || is_not_atom(tp[1])) { + goto badarg; + } + if (((tp[1] != am_bm) && (tp[1] != am_ac)) || + !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { + goto badarg; + } + type = tp[1]; + bin = ((ProcBin *) binary_val(tp[2]))->val; + if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) { + goto badarg; + } + bin_term = tp[2]; + } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) { + goto badarg; + } + runres = do_binary_matches(BIF_P,BIF_ARG_1,hsstart,hslen,type,bin, + NIL,&result); + if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { + Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE); + bin_term = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin); + } else if (bin_term == NIL) { + erts_bin_free(bin); + } + switch (runres) { + case DO_BIN_MATCH_OK: + BIF_RET(result); + case DO_BIN_MATCH_RESTART: + BUMP_ALL_REDS(BIF_P); + BIF_TRAP3(&binary_matches_trap_export, BIF_P, BIF_ARG_1, result, + bin_term); + default: + goto badarg; + } + badarg: + BIF_ERROR(BIF_P,BADARG); +} + + +BIF_RETTYPE binary_match_2(BIF_ALIST_2) +{ + return binary_match_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); +} + + +BIF_RETTYPE binary_matches_2(BIF_ALIST_2) +{ + return binary_matches_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); +} + +/* + * Hard debug functions (dump) for the search structures + */ + +#ifdef HARDDEBUG +static void dump_bm_data(BMData *bm) +{ + int i,j; + erts_printf("Dumping Boyer-More structure.\n"); + erts_printf("=============================\n"); + erts_printf("Searchstring [%ld]:\n", bm->len); + erts_printf("<<"); + for (i = 0; i < bm->len; ++i) { + if (i > 0) { + erts_printf(", "); + } + erts_printf("%d", (int) bm->x[i]); + if (bm->x[i] >= 'A') { + erts_printf(" ($%c)",(char) bm->x[i]); + } + } + erts_printf(">>\n"); + erts_printf("GoodShift array:\n"); + for (i = 0; i < bm->len; ++i) { + erts_printf("GoodShift[%d]: %ld\n", i, bm->goodshift[i]); + } + erts_printf("BadShift array:\n"); + j = 0; + for (i = 0; i < ALPHABET_SIZE; i += j) { + for (j = 0; i + j < ALPHABET_SIZE && j < 6; ++j) { + erts_printf("BS[%03d]:%02ld, ", i+j, bm->badshift[i+j]); + } + erts_printf("\n"); + } +} + +static void dump_ac_node(ACNode *node, int indent, int ch) { + int i; + char *spaces = erts_alloc(ERTS_ALC_T_TMP, 10 * indent + 1); + memset(spaces,' ',10*indent); + spaces[10*indent] = '\0'; + erts_printf("%s-> %c\n",spaces,ch); + erts_printf("%sId: %u\n",spaces,(unsigned) node->id); + erts_printf("%sD: %u\n",spaces,(unsigned)node->d); + erts_printf("%sFinal: %d\n",spaces,(int)node->final); + erts_printf("%sFail: %u\n",spaces,(unsigned)node->h->id); + erts_free(ERTS_ALC_T_TMP,spaces); + for(i=0;ig[i] != NULL && node->g[i] != node) { + dump_ac_node(node->g[i],indent+1,i); + } + } +} + + +static void dump_ac_trie(ACTrie *act) +{ + erts_printf("Aho Corasick Trie dump.\n"); + erts_printf("=======================\n"); + erts_printf("Node counter: %u\n", (unsigned) act->idc); + erts_printf("Searchstring counter: %u\n", (unsigned) act->counter); + erts_printf("Trie:\n"); + dump_ac_node(act->root, 0, '0'); + return; +} +#endif -- cgit v1.2.3 From a6c89679cd6006b3e9839b426159fd4302321528 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Thu, 22 Apr 2010 18:45:36 +0200 Subject: Add binary:part to erl_bif_binary.c Change name of the 'scope' option for binary:match/matches. Add split and replace to binary.erl. Cleanup comments etc in binary.erl and atom.names Add testcases for part, split, replace and scopes. --- erts/emulator/beam/atom.names | 1 + erts/emulator/beam/bif.tab | 38 +++--- erts/emulator/beam/erl_bif_binary.c | 228 +++++++++++++++++++++++------------- 3 files changed, 166 insertions(+), 101 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index 4e3284a4cd..06ff0e11a4 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -458,6 +458,7 @@ atom scheduler atom scheduler_id atom schedulers_online atom scheme +atom scope atom sensitive atom sequential_tracer atom sequential_trace_token diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 3f51e6dc45..85674664e4 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -765,28 +765,22 @@ bif binary:match/2 bif binary:match/3 bif binary:matches/2 bif binary:matches/3 -# bif binary:split/2 -# bif binary:split/3 -# bif binary:substitute/3 -# bif binary:globally_substitute/3 -# bif binary:duplicate/2 - -# -# XXX:PaN Usecase for these two? Creeping Biffilism? -# -# bif binary:from_unsigned/1 -# bif binary:to_unsigned/1 - -# -# XXX:PaN The following are suggested to be implemented in the erlang code... -# - or are they meant to be guard bif's? -# -# binary:first/1 -# binary:first/2 -# binary:last/1 -# binary:last/2 -# binary:nth/2 -# binary:extract/3 +# bif binary:longest_common_prefix/1 +# bif binary:longest_common_suffix/1 +# bif binary:first/1 +# bif binary:last/1 +# bif binary:at/2 +bif binary:part/2 +bif binary:part/3 +# bif binary:bin_to_list/1 +# bif binary:bin_to_list/2 +# bif binary:bin_to_list/3 +# bif binary:list_to_bin/1 +# bif binary:copy/1 +# bif binary:copy/2 +# bif binary:referenced_byte_size/1 +# bif binary:decode_unsigned/1 +# bif binary:decode_unsigned/2 # # New Bifs in R13B4 diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c index 63c82443c5..a635280ac1 100644 --- a/erts/emulator/beam/erl_bif_binary.c +++ b/erts/emulator/beam/erl_bif_binary.c @@ -984,7 +984,7 @@ BIF_RETTYPE binary_compile_pattern_1(BIF_ALIST_1) #define DO_BIN_MATCH_BADARG -1 #define DO_BIN_MATCH_RESTART -2 -static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, +static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hsend, Eterm type, Binary *bin, Eterm state_term, Eterm *res_term) { @@ -1018,7 +1018,7 @@ static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, dump_bm_data(bm); #endif if (state_term == NIL) { - bm_init_find_first_match(&state, hsstart, hslen); + bm_init_find_first_match(&state, hsstart, hsend); } else { Eterm *ptr = big_val(state_term); memcpy(&state,ptr+2,sizeof(state)); @@ -1068,7 +1068,7 @@ static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, dump_ac_trie(act); #endif if (state_term == NIL) { - ac_init_find_first_match(&state, act, hsstart, hslen); + ac_init_find_first_match(&state, act, hsstart, hsend); } else { Eterm *ptr = big_val(state_term); memcpy(&state,ptr+2,sizeof(state)); @@ -1105,7 +1105,7 @@ static int do_binary_match(Process *p, Eterm subject, Uint hsstart, Uint hslen, } static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, - Uint hslen, Eterm type, Binary *bin, + Uint hsend, Eterm type, Binary *bin, Eterm state_term, Eterm *res_term) { byte *bytes; @@ -1138,7 +1138,7 @@ static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, dump_bm_data(bm); #endif if (state_term == NIL) { - bm_init_find_all(&state, hsstart, hslen); + bm_init_find_all(&state, hsstart, hsend); } else { Eterm *ptr = big_val(state_term); bm_restore_find_all(&state,(char *) (ptr+2)); @@ -1197,7 +1197,7 @@ static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, dump_ac_trie(act); #endif if (state_term == NIL) { - ac_init_find_all(&state, act, hsstart, hslen); + ac_init_find_all(&state, act, hsstart, hsend); } else { Eterm *ptr = big_val(state_term); ac_restore_find_all(&state,(char *) (ptr+2)); @@ -1246,6 +1246,67 @@ static int do_binary_matches(Process *p, Eterm subject, Uint hsstart, return DO_BIN_MATCH_BADARG; } +static int parse_match_opts_list(Eterm l, Eterm bin, Uint *posp, Uint *endp) +{ + Eterm *tp; + Uint pos; + Sint len; + if (l == ((Eterm) 0) || l == NIL) { + /* Invalid term or NIL, we're called from binary_match(es)_2 or + have no options*/ + *posp = 0; + *endp = binary_size(bin); + return 0; + } else if (is_list(l)) { + while(is_list(l)) { + Eterm t = CAR(list_val(l)); + Uint orig_size; + if (!is_tuple(t)) { + goto badarg; + } + tp = tuple_val(t); + if (arityval(*tp) != 2) { + goto badarg; + } + if (tp[1] != am_scope || is_not_tuple(tp[2])) { + goto badarg; + } + tp = tuple_val(tp[2]); + if (arityval(*tp) != 2) { + goto badarg; + } + if (!term_to_Uint(tp[1], &pos)) { + goto badarg; + } + if (!term_to_Sint(tp[2], &len)) { + goto badarg; + } + if (len < 0) { + Sint lentmp = -len; + if (-lentmp != len) { + goto badarg; + } + len = lentmp; + pos -= len; + } + if (((pos + len) - len) != pos) { + goto badarg; + } + *endp = len + pos; + *posp = pos; + if ((orig_size = binary_size(bin)) < pos || + orig_size < (*endp)) { + goto badarg; + } + l = CDR(list_val(l)); + } + return 0; + } else { + badarg: + return 1; + } +} + static BIF_RETTYPE binary_match_trap(BIF_ALIST_3) { int runres; @@ -1289,7 +1350,8 @@ static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3) BIF_RETTYPE binary_match_3(BIF_ALIST_3) { - Uint hsstart, hslen; + Uint hsstart; + Uint hsend; Eterm *tp; Eterm type; Binary *bin; @@ -1300,42 +1362,10 @@ BIF_RETTYPE binary_match_3(BIF_ALIST_3) if (is_not_binary(BIF_ARG_1)) { goto badarg; } - if (BIF_ARG_3 == ((Eterm) 0)) { - /* Invalid term, we're called from binary_match_2... */ - hsstart = 0; - hslen = binary_size(BIF_ARG_1); - } else if (is_list(BIF_ARG_3)) { - Eterm l = BIF_ARG_3; - while(is_list(l)) { - Eterm t = CAR(list_val(l)); - if (!is_tuple(t)) { - goto badarg; - } - tp = tuple_val(t); - if (arityval(*tp) != 2) { - goto badarg; - } - if (!term_to_Uint(tp[1], &hsstart) || - ((hsstart >> 16) >> 15) != 0) { - goto badarg; - } - if (!term_to_Uint(tp[2], &hslen) || - ((hslen >> 16) >> 15) != 0) { - goto badarg; - } - if (hslen < hsstart) { - goto badarg; - } - if (hslen > binary_size(BIF_ARG_1)-1) { - goto badarg; /* XXX:PaN or should we take as much as we have ? */ - } - hslen = hslen + 1 - hsstart; - l = CDR(list_val(l)); - } - } else if (BIF_ARG_3 != NIL) { + if (parse_match_opts_list(BIF_ARG_3,BIF_ARG_1,&hsstart,&hsend)) { goto badarg; } - if (hslen == 0) { + if (hsend == 0) { BIF_RET(am_nomatch); } if (is_tuple(BIF_ARG_2)) { @@ -1356,7 +1386,7 @@ BIF_RETTYPE binary_match_3(BIF_ALIST_3) } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) { goto badarg; } - runres = do_binary_match(BIF_P,BIF_ARG_1,hsstart,hslen,type,bin,NIL,&result); + runres = do_binary_match(BIF_P,BIF_ARG_1,hsstart,hsend,type,bin,NIL,&result); if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE); bin_term = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin); @@ -1378,7 +1408,7 @@ BIF_RETTYPE binary_match_3(BIF_ALIST_3) BIF_RETTYPE binary_matches_3(BIF_ALIST_3) { - Uint hsstart, hslen; + Uint hsstart, hsend; Eterm *tp; Eterm type; Binary *bin; @@ -1389,43 +1419,10 @@ BIF_RETTYPE binary_matches_3(BIF_ALIST_3) if (is_not_binary(BIF_ARG_1)) { goto badarg; } - if (BIF_ARG_3 == ((Eterm) 0)) { - /* Invalid term, we're called from binary_matches_2... */ - hsstart = 0; - hslen = binary_size(BIF_ARG_1); - } else if (is_list(BIF_ARG_3)) { - Eterm l = BIF_ARG_3; - while(is_list(l)) { - Eterm t = CAR(list_val(l)); - if (!is_tuple(t)) { - goto badarg; - } - tp = tuple_val(t); - if (arityval(*tp) != 2) { - goto badarg; - } - if (!term_to_Uint(tp[1], &hsstart) || - ((hsstart >> 16) >> 15) != 0) { - goto badarg; - } - if (!term_to_Uint(tp[2], &hslen) || - ((hslen >> 16) >> 15) != 0) { - goto badarg; - } - if (hslen < hsstart) { - goto badarg; - } - if (hslen > binary_size(BIF_ARG_1)-1) { - goto badarg; /* XXX:PaN or should we take as much as we - have ? */ - } - hslen = hslen + 1 - hsstart; - l = CDR(list_val(l)); - } - } else if (BIF_ARG_3 != NIL) { + if (parse_match_opts_list(BIF_ARG_3,BIF_ARG_1,&hsstart,&hsend)) { goto badarg; } - if (hslen == 0) { + if (hsend == 0) { BIF_RET(am_nomatch); } if (is_tuple(BIF_ARG_2)) { @@ -1446,7 +1443,7 @@ BIF_RETTYPE binary_matches_3(BIF_ALIST_3) } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) { goto badarg; } - runres = do_binary_matches(BIF_P,BIF_ARG_1,hsstart,hslen,type,bin, + runres = do_binary_matches(BIF_P,BIF_ARG_1,hsstart,hsend,type,bin, NIL,&result); if (runres == DO_BIN_MATCH_RESTART && bin_term == NIL) { Eterm *hp = HAlloc(BIF_P, PROC_BIN_SIZE); @@ -1480,6 +1477,79 @@ BIF_RETTYPE binary_matches_2(BIF_ALIST_2) return binary_matches_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); } +BIF_RETTYPE binary_part_3(BIF_ALIST_3) +{ + Uint pos; + Sint len; + size_t orig_size; + Eterm orig; + Uint offset; + Uint bit_offset; + Uint bit_size; + Eterm* hp; + ErlSubBin* sb; + + if (is_not_binary(BIF_ARG_1)) { + goto badarg; + } + if (!term_to_Uint(BIF_ARG_2, &pos)) { + goto badarg; + } + if (!term_to_Sint(BIF_ARG_3, &len)) { + goto badarg; + } + if (len < 0) { + Sint lentmp = -len; + if (-lentmp != len) { + goto badarg; + } + len = lentmp; + pos -= len; + } + if (((pos + len) - len) != pos) { + goto badarg; + } + if ((orig_size = binary_size(BIF_ARG_1)) < pos || + orig_size < (pos + len)) { + goto badarg; + } + + + + hp = HAlloc(BIF_P, ERL_SUB_BIN_SIZE); + + ERTS_GET_REAL_BIN(BIF_ARG_1, orig, offset, bit_offset, bit_size); + sb = (ErlSubBin *) hp; + sb->thing_word = HEADER_SUB_BIN; + sb->size = len; + sb->offs = offset + pos; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = 0; + sb->is_writable = 0; + + BIF_RET(make_binary(sb)); + + badarg: + BIF_ERROR(BIF_P, BADARG); +} + +BIF_RETTYPE binary_part_2(BIF_ALIST_2) +{ + Eterm *tp; + if (is_not_tuple(BIF_ARG_2)) { + goto badarg; + } + tp = tuple_val(BIF_ARG_2); + if (arityval(*tp) != 2) { + goto badarg; + } + return binary_part_3(BIF_P,BIF_ARG_1,tp[1], tp[2]); + badarg: + BIF_ERROR(BIF_P,BADARG); +} + + /* * Hard debug functions (dump) for the search structures */ -- cgit v1.2.3 From 9d2fe9d9af19ab94ff3feb1e7b9ffd83fa6927ff Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Fri, 23 Apr 2010 18:53:51 +0200 Subject: Add binary:longest_common_prefix/longest_common_suffix Add allcoator parameter to erts_get_aligned_binary_bytes_extra. Add testcases for the functions above. Add reference implementation for the functions above. --- erts/emulator/beam/atom.names | 2 + erts/emulator/beam/bif.tab | 4 +- erts/emulator/beam/binary.c | 4 +- erts/emulator/beam/erl_alloc.types | 1 + erts/emulator/beam/erl_bif_binary.c | 317 ++++++++++++++++++++++++++++++++++++ erts/emulator/beam/erl_binary.h | 15 +- erts/emulator/beam/erl_nif.c | 4 +- 7 files changed, 337 insertions(+), 10 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index 06ff0e11a4..386b040a27 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -101,6 +101,8 @@ atom band atom big atom bif_return_trap atom binary +atom binary_longest_prefix_trap +atom binary_longest_suffix_trap atom binary_match_trap atom binary_matches_trap atom block diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 85674664e4..536f9ac5c8 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -765,8 +765,8 @@ bif binary:match/2 bif binary:match/3 bif binary:matches/2 bif binary:matches/3 -# bif binary:longest_common_prefix/1 -# bif binary:longest_common_suffix/1 +bif binary:longest_common_prefix/1 +bif binary:longest_common_suffix/1 # bif binary:first/1 # bif binary:last/1 # bif binary:at/2 diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c index 7ca3eb686d..9c4076c8ff 100644 --- a/erts/emulator/beam/binary.c +++ b/erts/emulator/beam/binary.c @@ -180,7 +180,7 @@ erts_realloc_binary(Eterm bin, size_t size) } byte* -erts_get_aligned_binary_bytes_extra(Eterm bin, byte** base_ptr, unsigned extra) +erts_get_aligned_binary_bytes_extra(Eterm bin, byte** base_ptr, ErtsAlcType_t allocator, unsigned extra) { byte* bytes; Eterm* real_bin; @@ -208,7 +208,7 @@ erts_get_aligned_binary_bytes_extra(Eterm bin, byte** base_ptr, unsigned extra) bytes = (byte *)(&(((ErlHeapBin *) real_bin)->data)) + offs; } if (bit_offs) { - byte* buf = (byte *) erts_alloc(ERTS_ALC_T_TMP, byte_size + extra); + byte* buf = (byte *) erts_alloc(allocator, byte_size + extra); *base_ptr = buf; buf += extra; erts_copy_bits(bytes, bit_offs, 1, buf, 0, 1, byte_size*8); diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types index 5d2872a4e3..6f88bbe5b8 100644 --- a/erts/emulator/beam/erl_alloc.types +++ b/erts/emulator/beam/erl_alloc.types @@ -232,6 +232,7 @@ type RE_SUBJECT SHORT_LIVED SYSTEM re_subject type RE_HEAP STANDARD SYSTEM re_heap type RE_STACK SHORT_LIVED SYSTEM re_stack type UNICODE_BUFFER SHORT_LIVED SYSTEM unicode_buffer +type BINARY_BUFFER SHORT_LIVED SYSTEM binary_buffer type PRE_ALLOC_DATA LONG_LIVED SYSTEM pre_alloc_data type DRV_THR_OPTS DRIVER SYSTEM driver_thread_opts type DRV_TID DRIVER SYSTEM driver_tid diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c index a635280ac1..5bde065049 100644 --- a/erts/emulator/beam/erl_bif_binary.c +++ b/erts/emulator/beam/erl_bif_binary.c @@ -55,6 +55,10 @@ static Export binary_match_trap_export; static BIF_RETTYPE binary_match_trap(BIF_ALIST_3); static Export binary_matches_trap_export; static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3); +static Export binary_longest_prefix_trap_export; +static BIF_RETTYPE binary_longest_prefix_trap(BIF_ALIST_3); +static Export binary_longest_suffix_trap_export; +static BIF_RETTYPE binary_longest_suffix_trap(BIF_ALIST_3); static Uint max_loop_limit; @@ -76,6 +80,22 @@ void erts_init_bif_binary(void) binary_matches_trap_export.code[3] = (BeamInstr) em_apply_bif; binary_matches_trap_export.code[4] = (BeamInstr) &binary_matches_trap; + sys_memset((void *) &binary_longest_prefix_trap_export, 0, sizeof(Export)); + binary_longest_prefix_trap_export.address = &binary_longest_prefix_trap_export.code[3]; + binary_longest_prefix_trap_export.code[0] = am_erlang; + binary_longest_prefix_trap_export.code[1] = am_binary_longest_prefix_trap; + binary_longest_prefix_trap_export.code[2] = 3; + binary_longest_prefix_trap_export.code[3] = (BeamInstr) em_apply_bif; + binary_longest_prefix_trap_export.code[4] = (BeamInstr) &binary_longest_prefix_trap; + + sys_memset((void *) &binary_longest_suffix_trap_export, 0, sizeof(Export)); + binary_longest_suffix_trap_export.address = &binary_longest_suffix_trap_export.code[3]; + binary_longest_suffix_trap_export.code[0] = am_erlang; + binary_longest_suffix_trap_export.code[1] = am_binary_longest_suffix_trap; + binary_longest_suffix_trap_export.code[2] = 3; + binary_longest_suffix_trap_export.code[3] = (BeamInstr) em_apply_bif; + binary_longest_suffix_trap_export.code[4] = (BeamInstr) &binary_longest_suffix_trap; + max_loop_limit = 0; return; } @@ -1549,6 +1569,303 @@ BIF_RETTYPE binary_part_2(BIF_ALIST_2) BIF_ERROR(BIF_P,BADARG); } +typedef struct { + int type; /* CL_TYPE_XXX */ + byte *temp_alloc; /* Used for erts_get/free_aligned, i.e. CL_TYPE_ALIGNED */ + unsigned char *buff; /* Used for all types, malloced if CL_TYPE_HEAP */ + Uint bufflen; /* The length (in bytes) of buffer */ +} CommonData; + +#define COMMON_LOOP_FACTOR 10 + +#define DIRECTION_PREFIX 0 +#define DIRECTION_SUFFIX 1 + +#define CL_OK 0 +#define CL_RESTART 1 + +/* The type field in the above structure */ +#define CL_TYPE_EMPTY 0 /* End of array */ +#define CL_TYPE_HEAP 1 +#define CL_TYPE_ALIGNED 2 +#define CL_TYPE_COMMON 3 /* emacsulated */ +#define CL_TYPE_HEAP_NOALLOC 4 /* Will need allocating when trapping */ + + +static int do_search_forward(CommonData *cd, Uint *posp, Uint *redsp) +{ + Uint pos = *posp; + Sint reds = (Sint) *redsp; + int i; + unsigned char current = 0; + + for(;;) { + for(i = 0; cd[i].type != CL_TYPE_EMPTY; ++i) { + if (pos >= cd[i].bufflen) { + *posp = pos; + if (reds > 0) { + *redsp = (Uint) reds; + } else { + *redsp = 0; + } + return CL_OK; + } + if (i == 0) { + current = cd[i].buff[pos]; + } else { + if (cd[i].buff[pos] != current) { + *posp = pos; + if (reds > 0) { + *redsp = (Uint) reds; + } else { + *redsp = 0; + } + return CL_OK; + } + } + --reds; + } + ++pos; + if (reds <= 0) { + *posp = pos; + *redsp = 0; + return CL_RESTART; + } + } +} +static int do_search_backward(CommonData *cd, Uint *posp, Uint *redsp) +{ + Uint pos = *posp; + Sint reds = (Sint) *redsp; + int i; + unsigned char current = 0; + + for(;;) { + for(i = 0; cd[i].type != CL_TYPE_EMPTY; ++i) { + if (pos >= cd[i].bufflen) { + *posp = pos; + if (reds > 0) { + *redsp = (Uint) reds; + } else { + *redsp = 0; + } + return CL_OK; + } + if (i == 0) { + current = cd[i].buff[cd[i].bufflen - 1 - pos]; + } else { + if (cd[i].buff[cd[i].bufflen - 1 - pos] != current) { + *posp = pos; + if (reds > 0) { + *redsp = (Uint) reds; + } else { + *redsp = 0; + } + return CL_OK; + } + } + --reds; + } + ++pos; + if (reds <= 0) { + *posp = pos; + *redsp = 0; + return CL_RESTART; + } + } +} + +static void cleanup_common_data(Binary *bp) +{ + int i; + CommonData *cd; + cd = (CommonData *) ERTS_MAGIC_BIN_DATA(bp); + for (i=0;cd[i].type != CL_TYPE_EMPTY;++i) { + switch (cd[i].type) { + case CL_TYPE_HEAP: + erts_free(ERTS_ALC_T_BINARY_BUFFER,cd[i].buff); + break; + case CL_TYPE_ALIGNED: + erts_free_aligned_binary_bytes_extra(cd[i].temp_alloc, ERTS_ALC_T_BINARY_BUFFER); + break; + default: + break; + } + } + return; +} + +static BIF_RETTYPE do_longest_common(Process *p, Eterm list, int direction) +{ + Eterm l = list; + int n = 0; + Binary *mb; + CommonData *cd; + int i = 0; + Uint reds = get_reds(p, COMMON_LOOP_FACTOR); + Uint save_reds = reds; + int res; + Export *trapper; + Uint pos; + Eterm epos; + Eterm *hp; + Eterm bin_term; + Eterm b; + + /* First just count the number of binaries */ + while (is_list(l)) { + b = CAR(list_val(l)); + if (!is_binary(b)) { + goto badarg; + } + ++n; + l = CDR(list_val(l)); + } + if (l != NIL || n == 0) { + goto badarg; + } + + /* OK, now create a buffer of the right size, we can do a magic binary right away, + thats not to costly. */ + mb = erts_create_magic_binary((n+1)*sizeof(CommonData),cleanup_common_data); + cd = (CommonData *) ERTS_MAGIC_BIN_DATA(mb); + l = list; + while (is_list(l)) { + Uint bitoffs; + Uint bitsize; + Uint offset; + Eterm real_bin; + ProcBin* pb; + + cd[i].type = CL_TYPE_EMPTY; + b = CAR(list_val(l)); + ERTS_GET_REAL_BIN(b, real_bin, offset, bitoffs, bitsize); + if (bitsize != 0) { + erts_bin_free(mb); + goto badarg; + } + cd[i].bufflen = binary_size(b); + cd[i].temp_alloc = NULL; + if (*(binary_val(real_bin)) == HEADER_PROC_BIN) { + pb = (ProcBin *) binary_val(real_bin); + if (pb->flags) { + erts_emasculate_writable_binary(pb); + } + cd[i].buff = erts_get_aligned_binary_bytes_extra(b, &(cd[i].temp_alloc), + ERTS_ALC_T_BINARY_BUFFER,0); + cd[i].type = (cd[i].temp_alloc != NULL) ? CL_TYPE_ALIGNED : CL_TYPE_COMMON; + } else { /* Heap binary */ + cd[i].buff = erts_get_aligned_binary_bytes_extra(b, &(cd[i].temp_alloc), + ERTS_ALC_T_BINARY_BUFFER,0); + /* CL_TYPE_HEAP_NOALLOC means you have to copy if trapping */ + cd[i].type = (cd[i].temp_alloc != NULL) ? CL_TYPE_ALIGNED : CL_TYPE_HEAP_NOALLOC; + } + ++i; + l = CDR(list_val(l)); + } + cd[i].type = CL_TYPE_EMPTY; +#if defined(DEBUG) || defined(VALGRIND) + cd[i].temp_alloc = NULL; + cd[i].buff = NULL; + cd[i].bufflen = 0; +#endif + + pos = 0; + if (direction == DIRECTION_PREFIX) { + trapper = &binary_longest_prefix_trap_export; + res = do_search_forward(cd,&pos,&reds); + } else { + ASSERT(direction == DIRECTION_SUFFIX); + trapper = &binary_longest_suffix_trap_export; + res = do_search_backward(cd,&pos,&reds); + } + epos = erts_make_integer(pos,p); + if (res == CL_OK) { + erts_bin_free(mb); + BUMP_REDS(p, (save_reds - reds) / COMMON_LOOP_FACTOR); + BIF_RET(epos); + } else { + ASSERT(res == CL_RESTART); + /* Copy all heap binaries that are not already copied (aligned) */ + for(i = 0; i < n; ++i) { + if (cd[i].type == CL_TYPE_HEAP_NOALLOC) { + unsigned char *tmp = cd[i].buff; + cd[i].buff = erts_alloc(ERTS_ALC_T_BINARY_BUFFER, cd[i].bufflen); + memcpy(cd[i].buff,tmp,cd[i].bufflen); + } + } + hp = HAlloc(p, PROC_BIN_SIZE); + bin_term = erts_mk_magic_binary_term(&hp, &MSO(p), mb); + BUMP_ALL_REDS(p); + BIF_TRAP3(trapper, p, bin_term, epos,list); + } + badarg: + BIF_ERROR(p,BADARG); +} + +static BIF_RETTYPE do_longest_common_trap(Process *p, Eterm bin_term, Eterm current_pos, + Eterm orig_list, int direction) +{ + Uint reds = get_reds(p, COMMON_LOOP_FACTOR); + Uint save_reds = reds; + Uint pos; + Binary *bin; + CommonData *cd; + int res; + Eterm epos; + Export *trapper; + +#ifdef DEBUG + int r; + r = term_to_Uint(current_pos, &pos); + ASSERT(r != 0); +#else + term_to_Uint(current_pos, &pos); +#endif + ASSERT(ERTS_TERM_IS_MAGIC_BINARY(bin_term)); + bin = ((ProcBin *) binary_val(bin_term))->val; + cd = (CommonData *) ERTS_MAGIC_BIN_DATA(bin); + if (direction == DIRECTION_PREFIX) { + trapper = &binary_longest_prefix_trap_export; + res = do_search_forward(cd,&pos,&reds); + } else { + ASSERT(direction == DIRECTION_SUFFIX); + trapper = &binary_longest_suffix_trap_export; + res = do_search_backward(cd,&pos,&reds); + } + epos = erts_make_integer(pos,p); + if (res == CL_OK) { + BUMP_REDS(p, (save_reds - reds) / COMMON_LOOP_FACTOR); + BIF_RET(epos); + } else { + ASSERT(res == CL_RESTART); + /* Copy all heap binaries that are not already copied (aligned) */ + BUMP_ALL_REDS(p); + BIF_TRAP3(&binary_longest_prefix_trap_export, p, bin_term, epos, orig_list); + } +} + +static BIF_RETTYPE binary_longest_prefix_trap(BIF_ALIST_3) +{ + return do_longest_common_trap(BIF_P,BIF_ARG_1,BIF_ARG_2,BIF_ARG_3,DIRECTION_PREFIX); +} + +static BIF_RETTYPE binary_longest_suffix_trap(BIF_ALIST_3) +{ + return do_longest_common_trap(BIF_P,BIF_ARG_1,BIF_ARG_2,BIF_ARG_3,DIRECTION_SUFFIX); +} + +BIF_RETTYPE binary_longest_common_prefix_1(BIF_ALIST_1) +{ + return do_longest_common(BIF_P,BIF_ARG_1,DIRECTION_PREFIX); +} + +BIF_RETTYPE binary_longest_common_suffix_1(BIF_ALIST_1) +{ + return do_longest_common(BIF_P,BIF_ARG_1,DIRECTION_SUFFIX); +} + + /* * Hard debug functions (dump) for the search structures diff --git a/erts/emulator/beam/erl_binary.h b/erts/emulator/beam/erl_binary.h index 5b0b3bcec2..74d7966ca0 100644 --- a/erts/emulator/beam/erl_binary.h +++ b/erts/emulator/beam/erl_binary.h @@ -150,7 +150,7 @@ do { \ void erts_init_binary(void); -byte* erts_get_aligned_binary_bytes_extra(Eterm, byte**, unsigned extra); +byte* erts_get_aligned_binary_bytes_extra(Eterm, byte**, ErtsAlcType_t, unsigned extra); #if defined(__i386__) || !defined(__GNUC__) /* @@ -168,6 +168,7 @@ byte* erts_get_aligned_binary_bytes_extra(Eterm, byte**, unsigned extra); ERTS_GLB_INLINE byte* erts_get_aligned_binary_bytes(Eterm bin, byte** base_ptr); ERTS_GLB_INLINE void erts_free_aligned_binary_bytes(byte* buf); +ERTS_GLB_INLINE void erts_free_aligned_binary_bytes_extra(byte* buf, ErtsAlcType_t); ERTS_GLB_INLINE Binary *erts_bin_drv_alloc_fnf(Uint size); ERTS_GLB_INLINE Binary *erts_bin_drv_alloc(Uint size); ERTS_GLB_INLINE Binary *erts_bin_nrml_alloc(Uint size); @@ -184,17 +185,23 @@ ERTS_GLB_INLINE Binary *erts_create_magic_binary(Uint size, ERTS_GLB_INLINE byte* erts_get_aligned_binary_bytes(Eterm bin, byte** base_ptr) { - return erts_get_aligned_binary_bytes_extra(bin, base_ptr, 0); + return erts_get_aligned_binary_bytes_extra(bin, base_ptr, ERTS_ALC_T_TMP, 0); } ERTS_GLB_INLINE void -erts_free_aligned_binary_bytes(byte* buf) +erts_free_aligned_binary_bytes_extra(byte* buf, ErtsAlcType_t allocator) { if (buf) { - erts_free(ERTS_ALC_T_TMP, (void *) buf); + erts_free(allocator, (void *) buf); } } +ERTS_GLB_INLINE void +erts_free_aligned_binary_bytes(byte* buf) +{ + erts_free_aligned_binary_bytes_extra(buf,ERTS_ALC_T_TMP); +} + /* Explicit extra bytes allocated to counter buggy drivers. ** These extra bytes where earlier (< R13B04) added by an alignment-bug ** in this code. Do we dare remove this in some major release (R14?) maybe? diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c index 2790020117..cee4df72a2 100644 --- a/erts/emulator/beam/erl_nif.c +++ b/erts/emulator/beam/erl_nif.c @@ -250,7 +250,7 @@ int enif_is_ref(ErlNifEnv* env, ERL_NIF_TERM term) static void aligned_binary_dtor(struct enif_tmp_obj_t* obj) { - erts_free_aligned_binary_bytes((byte*)obj); + erts_free_aligned_binary_bytes_extra((byte*)obj,ERTS_ALC_T_TMP); } int enif_inspect_binary(ErlNifEnv* env, Eterm bin_term, ErlNifBinary* bin) @@ -260,7 +260,7 @@ int enif_inspect_binary(ErlNifEnv* env, Eterm bin_term, ErlNifBinary* bin) byte* raw_ptr; }u; u.tmp = NULL; - bin->data = erts_get_aligned_binary_bytes_extra(bin_term, &u.raw_ptr, + bin->data = erts_get_aligned_binary_bytes_extra(bin_term, &u.raw_ptr, ERTS_ALC_T_TMP, sizeof(struct enif_tmp_obj_t)); if (bin->data == NULL) { return 0; -- cgit v1.2.3 From 02af6209ed16b8588b76e8c1972000eee12ba225 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Wed, 28 Apr 2010 20:26:46 +0200 Subject: Add bin_to_list/{1,2,3} Add testcases for bin_to_list. Teach binref.erl bin_to_list. --- erts/emulator/beam/atom.names | 1 + erts/emulator/beam/bif.tab | 12 +- erts/emulator/beam/erl_bif_binary.c | 296 +++++++++++++++++++++++++++++++++++- 3 files changed, 302 insertions(+), 7 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index 386b040a27..0117e14816 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -101,6 +101,7 @@ atom band atom big atom bif_return_trap atom binary +atom binary_bin_to_list_trap atom binary_longest_prefix_trap atom binary_longest_suffix_trap atom binary_match_trap diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 536f9ac5c8..4d79856e43 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -767,14 +767,14 @@ bif binary:matches/2 bif binary:matches/3 bif binary:longest_common_prefix/1 bif binary:longest_common_suffix/1 -# bif binary:first/1 -# bif binary:last/1 -# bif binary:at/2 +bif binary:first/1 +bif binary:last/1 +bif binary:at/2 bif binary:part/2 bif binary:part/3 -# bif binary:bin_to_list/1 -# bif binary:bin_to_list/2 -# bif binary:bin_to_list/3 +bif binary:bin_to_list/1 +bif binary:bin_to_list/2 +bif binary:bin_to_list/3 # bif binary:list_to_bin/1 # bif binary:copy/1 # bif binary:copy/2 diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c index 5bde065049..0a9454f6bc 100644 --- a/erts/emulator/beam/erl_bif_binary.c +++ b/erts/emulator/beam/erl_bif_binary.c @@ -59,6 +59,8 @@ static Export binary_longest_prefix_trap_export; static BIF_RETTYPE binary_longest_prefix_trap(BIF_ALIST_3); static Export binary_longest_suffix_trap_export; static BIF_RETTYPE binary_longest_suffix_trap(BIF_ALIST_3); +static Export binary_bin_to_list_trap_export; +static BIF_RETTYPE binary_bin_to_list_trap(BIF_ALIST_3); static Uint max_loop_limit; @@ -96,6 +98,14 @@ void erts_init_bif_binary(void) binary_longest_suffix_trap_export.code[3] = (BeamInstr) em_apply_bif; binary_longest_suffix_trap_export.code[4] = (BeamInstr) &binary_longest_suffix_trap; + sys_memset((void *) &binary_bin_to_list_trap_export, 0, sizeof(Export)); + binary_bin_to_list_trap_export.address = &binary_bin_to_list_trap_export.code[3]; + binary_bin_to_list_trap_export.code[0] = am_erlang; + binary_bin_to_list_trap_export.code[1] = am_binary_bin_to_list_trap; + binary_bin_to_list_trap_export.code[2] = 3; + binary_bin_to_list_trap_export.code[3] = (BeamInstr) em_apply_bif; + binary_bin_to_list_trap_export.code[4] = (BeamInstr) &binary_bin_to_list_trap; + max_loop_limit = 0; return; } @@ -228,9 +238,12 @@ static void dump_ac_node(ACNode *node, int indent, int ch); #ifndef MAX -#define MAX(A,B) (((A) > (B)) ? (A) : B) +#define MAX(A,B) (((A) > (B)) ? (A) : (B)) #endif +#ifndef MIN +#define MIN(A,B) (((A) > (B)) ? (B) : (A)) +#endif /* * Callback for the magic binary */ @@ -1524,6 +1537,9 @@ BIF_RETTYPE binary_part_3(BIF_ALIST_3) goto badarg; } len = lentmp; + if (len > pos) { + goto badarg; + } pos -= len; } if (((pos + len) - len) != pos) { @@ -1865,7 +1881,285 @@ BIF_RETTYPE binary_longest_common_suffix_1(BIF_ALIST_1) return do_longest_common(BIF_P,BIF_ARG_1,DIRECTION_SUFFIX); } +BIF_RETTYPE binary_first_1(BIF_ALIST_1) +{ + byte* bytes; + Uint byte_size; + Uint bit_offs; + Uint bit_size; + Uint res; + + if (is_not_binary(BIF_ARG_1)) { + goto badarg; + } + byte_size = binary_size(BIF_ARG_1); + if (!byte_size) { + goto badarg; + } + ERTS_GET_BINARY_BYTES(BIF_ARG_1,bytes,bit_offs,bit_size); + if (bit_size) { + goto badarg; + } + if (bit_offs) { + res = ((((Uint) bytes[0]) << bit_offs) | (((Uint) bytes[1]) >> (8-bit_offs))) & 0xFF; + } else { + res = bytes[0]; + } + BIF_RET(make_small(res)); + badarg: + BIF_ERROR(BIF_P,BADARG); +} + +BIF_RETTYPE binary_last_1(BIF_ALIST_1) +{ + byte* bytes; + Uint byte_size; + Uint bit_offs; + Uint bit_size; + Uint res; + + if (is_not_binary(BIF_ARG_1)) { + goto badarg; + } + byte_size = binary_size(BIF_ARG_1); + if (!byte_size) { + goto badarg; + } + ERTS_GET_BINARY_BYTES(BIF_ARG_1,bytes,bit_offs,bit_size); + if (bit_size) { + goto badarg; + } + if (bit_offs) { + res = ((((Uint) bytes[byte_size-1]) << bit_offs) | + (((Uint) bytes[byte_size]) >> (8-bit_offs))) & 0xFF; + } else { + res = bytes[byte_size-1]; + } + BIF_RET(make_small(res)); + badarg: + BIF_ERROR(BIF_P,BADARG); +} + +BIF_RETTYPE binary_at_2(BIF_ALIST_2) +{ + byte* bytes; + Uint byte_size; + Uint bit_offs; + Uint bit_size; + Uint res; + Uint index; + + if (is_not_binary(BIF_ARG_1)) { + goto badarg; + } + byte_size = binary_size(BIF_ARG_1); + if (!byte_size) { + goto badarg; + } + if (!term_to_Uint(BIF_ARG_2, &index)) { + goto badarg; + } + if (index >= byte_size) { + goto badarg; + } + ERTS_GET_BINARY_BYTES(BIF_ARG_1,bytes,bit_offs,bit_size); + if (bit_size) { + goto badarg; + } + if (bit_offs) { + res = ((((Uint) bytes[index]) << bit_offs) | + (((Uint) bytes[index+1]) >> (8-bit_offs))) & 0xFF; + } else { + res = bytes[index]; + } + BIF_RET(make_small(res)); + badarg: + BIF_ERROR(BIF_P,BADARG); +} + +#define BIN_TO_LIST_OK 0 +#define BIN_TO_LIST_TRAP 1 +/* No badarg, checked before call */ + +#define BIN_TO_LIST_LOOP_FACTOR 10 + +static int do_bin_to_list(Process *p, byte *bytes, Uint bit_offs, + Uint start, Sint *lenp, Eterm *termp) +{ + Uint reds = get_reds(p, BIN_TO_LIST_LOOP_FACTOR); + Uint len = *lenp; + Uint loops; + Eterm *hp; + Eterm term = *termp; + Uint n; + + if (reds == 0) { + return BIN_TO_LIST_TRAP; + } + loops = MIN(reds,len); + + BUMP_REDS(p, loops / BIN_TO_LIST_LOOP_FACTOR); + + hp = HAlloc(p,2*loops); + while (loops--) { + --len; + if (bit_offs) { + n = ((((Uint) bytes[start+len]) << bit_offs) | + (((Uint) bytes[start+len+1]) >> (8-bit_offs))) & 0xFF; + } else { + n = bytes[start+len]; + } + + term = CONS(hp,make_small(n),term); + hp +=2; + } + *termp = term; + *lenp = len; + if (len) { + BUMP_ALL_REDS(p); + return BIN_TO_LIST_TRAP; + } + return BIN_TO_LIST_OK; +} + + +static BIF_RETTYPE do_trap_bin_to_list(Process *p, Eterm binary, + Uint start, Sint len, Eterm sofar) +{ + Eterm *hp; + Eterm blob; + + hp = HAlloc(p,3); + hp[0] = make_pos_bignum_header(2); + hp[1] = start; + hp[2] = (Uint) len; + blob = make_big(hp); + BIF_TRAP3(&binary_bin_to_list_trap_export, p, binary, blob, sofar); +} + +static BIF_RETTYPE binary_bin_to_list_trap(BIF_ALIST_3) +{ + Eterm *ptr; + Uint start; + Sint len; + byte *bytes; + Uint bit_offs; + Uint bit_size; + Eterm res = BIF_ARG_3; + + ptr = big_val(BIF_ARG_2); + start = ptr[1]; + len = (Sint) ptr[2]; + + ERTS_GET_BINARY_BYTES(BIF_ARG_1,bytes,bit_offs,bit_size); + if(do_bin_to_list(BIF_P, bytes, bit_offs, start, &len, &res) == + BIN_TO_LIST_OK) { + BIF_RET(res); + } + return do_trap_bin_to_list(BIF_P,BIF_ARG_1,start,len,res); +} + +static BIF_RETTYPE binary_bin_to_list_common(Process *p, + Eterm bin, + Eterm epos, + Eterm elen) +{ + Uint pos; + Sint len; + size_t sz; + byte *bytes; + Uint bit_offs; + Uint bit_size; + Eterm res = NIL; + + if (is_not_binary(bin)) { + goto badarg; + } + if (!term_to_Uint(epos, &pos)) { + goto badarg; + } + if (!term_to_Sint(elen, &len)) { + goto badarg; + } + if (len < 0) { + Sint lentmp = -len; + if (-lentmp != len) { + goto badarg; + } + len = lentmp; + if (len > pos) { + goto badarg; + } + pos -= len; + } + if (((pos + len) - len) != pos) { + goto badarg; + } + sz = binary_size(bin); + + if (pos+len > sz) { + goto badarg; + } + ERTS_GET_BINARY_BYTES(bin,bytes,bit_offs,bit_size); + if (bit_size != 0) { + goto badarg; + } + if(do_bin_to_list(p, bytes, bit_offs, pos, &len, &res) == + BIN_TO_LIST_OK) { + BIF_RET(res); + } + return do_trap_bin_to_list(p,bin,pos,len,res); + + badarg: + BIF_ERROR(p,BADARG); +} + +BIF_RETTYPE binary_bin_to_list_3(BIF_ALIST_3) +{ + return binary_bin_to_list_common(BIF_P,BIF_ARG_1,BIF_ARG_2,BIF_ARG_3); +} + +BIF_RETTYPE binary_bin_to_list_2(BIF_ALIST_2) +{ + Eterm *tp; + + if (is_not_tuple(BIF_ARG_2)) { + goto badarg; + } + tp = tuple_val(BIF_ARG_2); + if (arityval(*tp) != 2) { + goto badarg; + } + return binary_bin_to_list_common(BIF_P,BIF_ARG_1,tp[1],tp[2]); + badarg: + BIF_ERROR(BIF_P,BADARG); +} + +BIF_RETTYPE binary_bin_to_list_1(BIF_ALIST_1) +{ + Uint pos = 0; + Sint len; + byte *bytes; + Uint bit_offs; + Uint bit_size; + Eterm res = NIL; + if (is_not_binary(BIF_ARG_1)) { + goto badarg; + } + len = binary_size(BIF_ARG_1); + ERTS_GET_BINARY_BYTES(BIF_ARG_1,bytes,bit_offs,bit_size); + if (bit_size != 0) { + goto badarg; + } + if(do_bin_to_list(BIF_P, bytes, bit_offs, pos, &len, &res) == + BIN_TO_LIST_OK) { + BIF_RET(res); + } + return do_trap_bin_to_list(BIF_P,BIF_ARG_1,pos,len,res); + badarg: + BIF_ERROR(BIF_P,BADARG); +} /* * Hard debug functions (dump) for the search structures -- cgit v1.2.3 From 1dad48ee9f2e1aba6a0ec69d9cf688705d6f187c Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Thu, 29 Apr 2010 20:06:55 +0200 Subject: Add binary:list_to_bin/1 and binary:copy/1,2 Add testcases for binary:list_to_bin/1 and binary:copy/1,2. Add reference implementation of list_to_bin/1. --- erts/emulator/beam/atom.names | 1 + erts/emulator/beam/bif.tab | 6 +- erts/emulator/beam/binary.c | 52 +++----- erts/emulator/beam/erl_bif_binary.c | 257 ++++++++++++++++++++++++++++++++++++ erts/emulator/beam/erl_binary.h | 7 + 5 files changed, 289 insertions(+), 34 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index 0117e14816..1138c0c871 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -102,6 +102,7 @@ atom big atom bif_return_trap atom binary atom binary_bin_to_list_trap +atom binary_copy_trap atom binary_longest_prefix_trap atom binary_longest_suffix_trap atom binary_match_trap diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 4d79856e43..bd908566ee 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -775,9 +775,9 @@ bif binary:part/3 bif binary:bin_to_list/1 bif binary:bin_to_list/2 bif binary:bin_to_list/3 -# bif binary:list_to_bin/1 -# bif binary:copy/1 -# bif binary:copy/2 +bif binary:list_to_bin/1 +bif binary:copy/1 +bif binary:copy/2 # bif binary:referenced_byte_size/1 # bif binary:decode_unsigned/1 # bif binary:decode_unsigned/2 diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c index 9c4076c8ff..c68392fad4 100644 --- a/erts/emulator/beam/binary.c +++ b/erts/emulator/beam/binary.c @@ -346,29 +346,40 @@ BIF_RETTYPE bitstring_to_list_1(BIF_ALIST_1) /* Turn a possibly deep list of ints (and binaries) into */ /* One large binary object */ -BIF_RETTYPE list_to_binary_1(BIF_ALIST_1) +/* + * This bif also exists in the binary module, under the name + * binary:list_to_bin/1, why it's divided into interface and + * implementation. Also the backend for iolist_to_binary_1. + */ + +BIF_RETTYPE erts_list_to_binary_bif(Process *p, Eterm arg) { Eterm bin; int i; int offset; byte* bytes; - if (is_nil(BIF_ARG_1)) { - BIF_RET(new_binary(BIF_P,(byte*)"",0)); + if (is_nil(arg)) { + BIF_RET(new_binary(p,(byte*)"",0)); } - if (is_not_list(BIF_ARG_1)) { + if (is_not_list(arg)) { goto error; } - if ((i = io_list_len(BIF_ARG_1)) < 0) { + if ((i = io_list_len(arg)) < 0) { goto error; } - bin = new_binary(BIF_P, (byte *)NULL, i); + bin = new_binary(p, (byte *)NULL, i); bytes = binary_bytes(bin); - offset = io_list_to_buf(BIF_ARG_1, (char*) bytes, i); + offset = io_list_to_buf(arg, (char*) bytes, i); ASSERT(offset == 0); BIF_RET(bin); - error: - BIF_ERROR(BIF_P, BADARG); + error: + BIF_ERROR(p, BADARG); +} + +BIF_RETTYPE list_to_binary_1(BIF_ALIST_1) +{ + return erts_list_to_binary_bif(BIF_P, BIF_ARG_1); } /* Turn a possibly deep list of ints (and binaries) into */ @@ -376,31 +387,10 @@ BIF_RETTYPE list_to_binary_1(BIF_ALIST_1) BIF_RETTYPE iolist_to_binary_1(BIF_ALIST_1) { - Eterm bin; - int i; - int offset; - byte* bytes; - if (is_binary(BIF_ARG_1)) { BIF_RET(BIF_ARG_1); } - if (is_nil(BIF_ARG_1)) { - BIF_RET(new_binary(BIF_P,(byte*)"",0)); - } - if (is_not_list(BIF_ARG_1)) { - goto error; - } - if ((i = io_list_len(BIF_ARG_1)) < 0) { - goto error; - } - bin = new_binary(BIF_P, (byte *)NULL, i); - bytes = binary_bytes(bin); - offset = io_list_to_buf(BIF_ARG_1, (char*) bytes, i); - ASSERT(offset == 0); - BIF_RET(bin); - - error: - BIF_ERROR(BIF_P, BADARG); + return erts_list_to_binary_bif(BIF_P, BIF_ARG_1); } BIF_RETTYPE list_to_bitstring_1(BIF_ALIST_1) diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c index 0a9454f6bc..e2d3d00db2 100644 --- a/erts/emulator/beam/erl_bif_binary.c +++ b/erts/emulator/beam/erl_bif_binary.c @@ -61,6 +61,8 @@ static Export binary_longest_suffix_trap_export; static BIF_RETTYPE binary_longest_suffix_trap(BIF_ALIST_3); static Export binary_bin_to_list_trap_export; static BIF_RETTYPE binary_bin_to_list_trap(BIF_ALIST_3); +static Export binary_copy_trap_export; +static BIF_RETTYPE binary_copy_trap(BIF_ALIST_2); static Uint max_loop_limit; @@ -105,6 +107,13 @@ void erts_init_bif_binary(void) binary_bin_to_list_trap_export.code[2] = 3; binary_bin_to_list_trap_export.code[3] = (BeamInstr) em_apply_bif; binary_bin_to_list_trap_export.code[4] = (BeamInstr) &binary_bin_to_list_trap; + sys_memset((void *) &binary_copy_trap_export, 0, sizeof(Export)); + binary_copy_trap_export.address = &binary_copy_trap_export.code[3]; + binary_copy_trap_export.code[0] = am_erlang; + binary_copy_trap_export.code[1] = am_binary_copy_trap; + binary_copy_trap_export.code[2] = 2; + binary_copy_trap_export.code[3] = (BeamInstr) em_apply_bif; + binary_copy_trap_export.code[4] = (BeamInstr) &binary_copy_trap; max_loop_limit = 0; return; @@ -121,6 +130,7 @@ Sint erts_binary_set_loop_limit(Sint limit) } else { max_loop_limit = (Uint) limit; } + return save; } @@ -131,6 +141,9 @@ static Uint get_reds(Process *p, int loop_factor) if (tmp != 0 && tmp < reds) { return tmp; } + if (!reds) { + reds = 1; + } return reds; } @@ -2161,6 +2174,250 @@ BIF_RETTYPE binary_bin_to_list_1(BIF_ALIST_1) BIF_ERROR(BIF_P,BADARG); } +/* + * Ok, erlang:list_to_binary does not interrupt, and we really don't want + * an alternative implementation for the exact same thing, why we + * have descided to use the old non-restarting implementation for now. + * In reality, there is seldom many iterations involved in doing this, so the + * problem of long-running-bif's is not really that big in this case. + * So, for now we use the old implementation also in the module binary. + */ + +BIF_RETTYPE binary_list_to_bin_1(BIF_ALIST_1) +{ + return erts_list_to_binary_bif(BIF_P, BIF_ARG_1); +} + +typedef struct { + Uint times_left; + Uint source_size; + int source_type; + byte *source; + byte *temp_alloc; + Uint result_pos; + Binary *result; +} CopyBinState; + +#define BC_TYPE_EMPTY 0 +#define BC_TYPE_HEAP 1 +#define BC_TYPE_ALIGNED 2 /* May or may not point to (emasculated) binary, temp_alloc field is set + so that erts_free_aligned_binary_bytes_extra can handle either */ + + +#define BINARY_COPY_LOOP_FACTOR 100 + +static void cleanup_copy_bin_state(Binary *bp) +{ + CopyBinState *cbs = (CopyBinState *) ERTS_MAGIC_BIN_DATA(bp); + if (cbs->result != NULL) { + erts_bin_free(cbs->result); + cbs->result = NULL; + } + switch (cbs->source_type) { + case BC_TYPE_HEAP: + erts_free(ERTS_ALC_T_BINARY_BUFFER,cbs->source); + break; + case BC_TYPE_ALIGNED: + erts_free_aligned_binary_bytes_extra(cbs->temp_alloc, + ERTS_ALC_T_BINARY_BUFFER); + break; + default: + /* otherwise do nothing */ + break; + } + cbs->source_type = BC_TYPE_EMPTY; +} + +/* + * Binary *erts_bin_nrml_alloc(Uint size); + * Binary *erts_bin_realloc(Binary *bp, Uint size); + * void erts_bin_free(Binary *bp); + */ +static BIF_RETTYPE do_binary_copy(Process *p, Eterm bin, Eterm en) +{ + Uint n; + byte *bytes; + Uint bit_offs; + Uint bit_size; + size_t size; + Uint reds = get_reds(p, BINARY_COPY_LOOP_FACTOR); + Uint target_size; + byte *t; + Uint pos; + + + if (is_not_binary(bin)) { + goto badarg; + } + if (!term_to_Uint(en, &n)) { + goto badarg; + } + if (!n) { + goto badarg; + } + ERTS_GET_BINARY_BYTES(bin,bytes,bit_offs,bit_size); + if (bit_size != 0) { + goto badarg; + } + + size = binary_size(bin); + target_size = size * n; + + if ((target_size - size) >= reds) { + Eterm orig; + Uint offset; + Uint bit_offset; + Uint bit_size; + CopyBinState *cbs; + Eterm *hp; + Eterm trap_term; + int i; + + /* We will trap, set up the structure for trapping right away */ + Binary *mb = erts_create_magic_binary(sizeof(CopyBinState), + cleanup_copy_bin_state); + cbs = ERTS_MAGIC_BIN_DATA(mb); + + cbs->temp_alloc = NULL; + cbs->source = NULL; + + ERTS_GET_REAL_BIN(bin, orig, offset, bit_offset, bit_size); + if (*(binary_val(orig)) == HEADER_PROC_BIN) { + ProcBin* pb = (ProcBin *) binary_val(orig); + if (pb->flags) { + erts_emasculate_writable_binary(pb); + } + cbs->source = + erts_get_aligned_binary_bytes_extra(bin, + &(cbs->temp_alloc), + ERTS_ALC_T_BINARY_BUFFER, + 0); + cbs->source_type = BC_TYPE_ALIGNED; + } else { /* Heap binary */ + cbs->source = + erts_get_aligned_binary_bytes_extra(bin, + &(cbs->temp_alloc), + ERTS_ALC_T_BINARY_BUFFER, + 0); + if (!(cbs->temp_alloc)) { /* alignment not needed, need to copy */ + byte *tmp = erts_alloc(ERTS_ALC_T_BINARY_BUFFER,size); + memcpy(tmp,cbs->source,size); + cbs->source = tmp; + cbs->source_type = BC_TYPE_HEAP; + } else { + cbs->source_type = BC_TYPE_ALIGNED; + } + } + cbs->result = erts_bin_nrml_alloc(target_size); /* Always offheap + if trapping */ + cbs->result->flags = 0; + cbs->result->orig_size = target_size; + erts_refc_init(&(cbs->result->refc), 1); + t = (byte *) cbs->result->orig_bytes; /* No offset or anything */ + pos = 0; + i = 0; + while (pos < reds) { + memcpy(t+pos,cbs->source, size); + pos += size; + ++i; + } + cbs->source_size = size; + cbs->result_pos = pos; + cbs->times_left = n-i; + hp = HAlloc(p,PROC_BIN_SIZE); + trap_term = erts_mk_magic_binary_term(&hp, &MSO(p), mb); + BUMP_ALL_REDS(p); + BIF_TRAP2(&binary_copy_trap_export, p, bin, trap_term); + } else { + Eterm res_term; + byte *temp_alloc = NULL; + byte *source = + erts_get_aligned_binary_bytes(bin, + &temp_alloc); + if (target_size <= ERL_ONHEAP_BIN_LIMIT) { + res_term = erts_new_heap_binary(p,NULL,target_size,&t); + } else { + res_term = erts_new_mso_binary(p,NULL,target_size); + t = ((ProcBin *) binary_val(res_term))->bytes; + } + pos = 0; + while (pos < target_size) { + memcpy(t+pos,source, size); + pos += size; + } + erts_free_aligned_binary_bytes(temp_alloc); + BUMP_REDS(p,pos / BINARY_COPY_LOOP_FACTOR); + BIF_RET(res_term); + } + badarg: + BIF_ERROR(p,BADARG); +} + +BIF_RETTYPE binary_copy_trap(BIF_ALIST_2) +{ + Uint n; + size_t size; + Uint reds = get_reds(BIF_P, BINARY_COPY_LOOP_FACTOR); + byte *t; + Uint pos; + Binary *mb = ((ProcBin *) binary_val(BIF_ARG_2))->val; + CopyBinState *cbs = (CopyBinState *) ERTS_MAGIC_BIN_DATA(mb); + Uint opos; + + /* swapout... */ + n = cbs->times_left; + size = cbs->source_size; + opos = pos = cbs->result_pos; + t = (byte *) cbs->result->orig_bytes; /* "well behaved" binary */ + if ((n-1) * size >= reds) { + Uint i = 0; + while ((pos - opos) < reds) { + memcpy(t+pos,cbs->source, size); + pos += size; + ++i; + } + cbs->result_pos = pos; + cbs->times_left -= i; + BUMP_ALL_REDS(BIF_P); + BIF_TRAP2(&binary_copy_trap_export, BIF_P, BIF_ARG_1, BIF_ARG_2); + } else { + Binary *save; + ProcBin* pb; + Uint target_size = cbs->result->orig_size; + while (pos < target_size) { + memcpy(t+pos,cbs->source, size); + pos += size; + } + save = cbs->result; + cbs->result = NULL; + cleanup_copy_bin_state(mb); /* now cbs is dead */ + pb = (ProcBin *) HAlloc(BIF_P, PROC_BIN_SIZE); + pb->thing_word = HEADER_PROC_BIN; + pb->size = target_size; + pb->next = MSO(BIF_P).mso; + MSO(BIF_P).mso = pb; + pb->val = save; + pb->bytes = t; + pb->flags = 0; + + MSO(BIF_P).overhead += target_size / sizeof(Eterm); + BUMP_REDS(BIF_P,(pos - opos) / BINARY_COPY_LOOP_FACTOR); + + BIF_RET(make_binary(pb)); + } +} + + +BIF_RETTYPE binary_copy_1(BIF_ALIST_1) +{ + return do_binary_copy(BIF_P,BIF_ARG_1,make_small(1)); +} + +BIF_RETTYPE binary_copy_2(BIF_ALIST_2) +{ + return do_binary_copy(BIF_P,BIF_ARG_1,BIF_ARG_2); +} + /* * Hard debug functions (dump) for the search structures */ diff --git a/erts/emulator/beam/erl_binary.h b/erts/emulator/beam/erl_binary.h index 74d7966ca0..aeeebf3c74 100644 --- a/erts/emulator/beam/erl_binary.h +++ b/erts/emulator/beam/erl_binary.h @@ -21,6 +21,7 @@ #define __ERL_BINARY_H #include "erl_threads.h" +#include "bif.h" /* * Maximum number of bytes to place in a heap binary. @@ -152,6 +153,12 @@ void erts_init_binary(void); byte* erts_get_aligned_binary_bytes_extra(Eterm, byte**, ErtsAlcType_t, unsigned extra); +/* + * Common implementation for erlang:list_to_binary/1 and binary:list_to_bin/1 + */ + +BIF_RETTYPE erts_list_to_binary_bif(Process *p, Eterm arg); + #if defined(__i386__) || !defined(__GNUC__) /* * Doubles aren't required to be 8-byte aligned on intel x86. -- cgit v1.2.3 From 8f21d4026e8f338245812d6edfd113c05282e321 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Fri, 30 Apr 2010 11:52:38 +0200 Subject: Add referenced_byte_size/1 Add testcases for referenced_byte_size/1. Add failure tests for referenced_byte_size. --- erts/emulator/beam/bif.tab | 2 +- erts/emulator/beam/erl_bif_binary.c | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) (limited to 'erts') diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index bd908566ee..7978044fe5 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -778,7 +778,7 @@ bif binary:bin_to_list/3 bif binary:list_to_bin/1 bif binary:copy/1 bif binary:copy/2 -# bif binary:referenced_byte_size/1 +bif binary:referenced_byte_size/1 # bif binary:decode_unsigned/1 # bif binary:decode_unsigned/2 diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c index e2d3d00db2..b3ebc95cea 100644 --- a/erts/emulator/beam/erl_bif_binary.c +++ b/erts/emulator/beam/erl_bif_binary.c @@ -2418,6 +2418,29 @@ BIF_RETTYPE binary_copy_2(BIF_ALIST_2) return do_binary_copy(BIF_P,BIF_ARG_1,BIF_ARG_2); } +BIF_RETTYPE binary_referenced_byte_size_1(BIF_ALIST_1) +{ + ErlSubBin *sb; + ProcBin *pb; + Eterm res; + Eterm bin = BIF_ARG_1; + + if (is_not_binary(BIF_ARG_1)) { + BIF_ERROR(BIF_P,BADARG); + } + sb = (ErlSubBin *) binary_val(bin); + if (sb->thing_word == HEADER_SUB_BIN) { + bin = sb->orig; + } + pb = (ProcBin *) binary_val(bin); + if (pb->thing_word == HEADER_PROC_BIN) { + res = erts_make_integer((Uint) pb->val->orig_size, BIF_P); /* XXX:PaN Halfword? orig_size is a long */ + } else { /* heap binary */ + res = erts_make_integer((Uint) ((ErlHeapBin *) pb)->size, BIF_P); + } + BIF_RET(res); +} + /* * Hard debug functions (dump) for the search structures */ -- cgit v1.2.3 From ccf8728a95af72aa4847e0844d45dcda991ea435 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Fri, 30 Apr 2010 18:17:17 +0200 Subject: Add binary:{encode,decode}_unsigned({1,2} Add testcases for encode/decode_unsigned/1,2. --- erts/emulator/beam/bif.tab | 6 +- erts/emulator/beam/erl_bif_binary.c | 304 ++++++++++++++++++++++++++++++++++++ 2 files changed, 308 insertions(+), 2 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 7978044fe5..157fbb7bb6 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -779,8 +779,10 @@ bif binary:list_to_bin/1 bif binary:copy/1 bif binary:copy/2 bif binary:referenced_byte_size/1 -# bif binary:decode_unsigned/1 -# bif binary:decode_unsigned/2 +bif binary:encode_unsigned/1 +bif binary:encode_unsigned/2 +bif binary:decode_unsigned/1 +bif binary:decode_unsigned/2 # # New Bifs in R13B4 diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c index b3ebc95cea..d073eb072f 100644 --- a/erts/emulator/beam/erl_bif_binary.c +++ b/erts/emulator/beam/erl_bif_binary.c @@ -2441,6 +2441,310 @@ BIF_RETTYPE binary_referenced_byte_size_1(BIF_ALIST_1) BIF_RET(res); } +#define END_BIG 0 +#define END_SMALL 1 + +#ifdef WORDS_BIGENDIAN +#define END_NATIVE END_BIG +#else +#define END_NATIVE END_SMALL +#endif + +static int get_need(Uint u) { +#if defined(ARCH_64) && !HALFWORD_HEAP + if (u > 0xFFFFFFFFUL) { + if (u > 0xFFFFFFFFFFFFUL) { + if (u > 0xFFFFFFFFFFFFFFUL) { + return 8; + } + return 7; + } + if (u > 0xFFFFFFFFFFUL) { + return 6; + } + return 5; + } +#endif + if (u > 0xFFFFUL) { + if (u > 0xFFFFFFUL) { + return 4; + } + return 3; + } + if (u > 0xFFUL) { + return 2; + } + return 1; +} + +static BIF_RETTYPE do_encode_unsigned(Process *p, Eterm uns, Eterm endianess) +{ + Eterm res; + if ((is_not_small(uns) && is_not_big(uns)) || is_not_atom(endianess) || + (endianess != am_big && endianess != am_little)) { + goto badarg; + } + if (is_small(uns)) { + Sint x = signed_val(uns); + Uint u; + int n,i; + byte *b; + + if (x < 0) { + goto badarg; + } + + u = (Uint) x; + n = get_need(u); + ASSERT(n <= ERL_ONHEAP_BIN_LIMIT); + res = erts_new_heap_binary(p, NULL, n, &b); + if (endianess == am_big) { + for(i=n-1;i>=0;--i) { + b[i] = u & 0xFF; + u >>= 8; + } + } else { + for(i=0;i>= 8; + } + } + BIF_RET(res); + } else { + /* Big */ + Eterm *bigp = big_val(uns); + Uint n; + dsize_t num_parts = BIG_SIZE(bigp); + Eterm res; + byte *b; + ErtsDigit d; + + if(BIG_SIGN(bigp)) { + goto badarg; + } + n = (num_parts-1)*sizeof(ErtsDigit)+get_need(BIG_DIGIT(bigp,(num_parts-1))); + if (n <= ERL_ONHEAP_BIN_LIMIT) { + res = erts_new_heap_binary(p,NULL,n,&b); + } else { + res = erts_new_mso_binary(p,NULL,n); + b = ((ProcBin *) binary_val(res))->bytes; + } + + if (endianess == am_big) { + Sint i,j; + j = 0; + d = BIG_DIGIT(bigp,0); + for (i=n-1;i>=0;--i) { + b[i] = d & 0xFF; + if (!((++j) % sizeof(ErtsDigit))) { + d = BIG_DIGIT(bigp,j / sizeof(ErtsDigit)); + } else { + d >>= 8; + } + } + } else { + Sint i,j; + j = 0; + d = BIG_DIGIT(bigp,0); + for (i=0;i>= 8; + } + } + + } + BIF_RET(res); + } + badarg: + BIF_ERROR(p,BADARG); +} + +static BIF_RETTYPE do_decode_unsigned(Process *p, Eterm uns, Eterm endianess) +{ + byte *bytes; + Uint bitoffs, bitsize; + Uint size; + Eterm res; + + if (is_not_binary(uns) || is_not_atom(endianess) || + (endianess != am_big && endianess != am_little)) { + goto badarg; + } + ERTS_GET_BINARY_BYTES(uns, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + /* align while rolling */ + size = binary_size(uns); + if (bitoffs) { + if (endianess == am_big) { + while (size && (((((Uint) bytes[0]) << bitoffs) | + (((Uint) bytes[1]) >> (8-bitoffs))) & 0xFF) == 0) { + ++bytes; + --size; + } + } else { + while(size && + (((((Uint) bytes[size-1]) << bitoffs) | + (((Uint) bytes[size]) >> (8-bitoffs))) & 0xFF) == 0) { + --size; + } + } + } else { + if (endianess == am_big) { + while (size && *bytes == 0) { + ++bytes; + --size; + } + } else { + while(size && bytes[size-1] == 0) { + --size; + } + } + } + if (!size) { + BIF_RET(make_small(0)); + } + + if (size <= sizeof(Uint)) { + Uint u = 0; + Sint i; + + if (endianess == am_big) { + if (bitoffs) { + for(i=0;i> (8-bitoffs))) & 0xFF); + } + } else { + for(i=0;i=0;--i) { + u <<=8; + u |= (((((Uint) bytes[i]) << bitoffs) | + (((Uint) bytes[i+1]) >> (8-bitoffs))) & 0xFF); + } + } else { + for(i=size-1;i>=0;--i) { + u <<=8; + u |= bytes[i]; + } + } + } + res = erts_make_integer(u,p); + BIF_RET(res); + } else { + /* Assume big, as we stripped away all zeroes from the MSB part of the binary */ + dsize_t num_parts = size / sizeof(ErtsDigit) + !!(size % sizeof(ErtsDigit)); + Eterm *bigp; + + bigp = HAlloc(p, BIG_NEED_SIZE(num_parts)); + *bigp = make_pos_bignum_header(num_parts); + res = make_big(bigp); + + if (endianess == am_big) { + Sint i,j; + ErtsDigit *d; + j = size; + d = &(BIG_DIGIT(bigp,num_parts - 1)); + *d = 0; + i = 0; + if(bitoffs) { + for (;;){ + (*d) <<= 8; + (*d) |= (((((Uint) bytes[i]) << bitoffs) | + (((Uint) bytes[i+1]) >> (8-bitoffs))) & 0xFF); + if (++i >= size) { + break; + } + if (!(--j % sizeof(ErtsDigit))) { + --d; + *d = 0; + } + } + } else { + for (;;){ + (*d) <<= 8; + (*d) |= bytes[i]; + if (++i >= size) { + break; + } + if (!(--j % sizeof(ErtsDigit))) { + --d; + *d = 0; + } + } + } + } else { + Sint i,j; + ErtsDigit *d; + j = size; + d = &(BIG_DIGIT(bigp,num_parts - 1)); + *d = 0; + i = size-1; + if (bitoffs) { + for (;;){ + (*d) <<= 8; + (*d) |= (((((Uint) bytes[i]) << bitoffs) | + (((Uint) bytes[i+1]) >> (8-bitoffs))) & 0xFF); + if (--i < 0) { + break; + } + if (!(--j % sizeof(ErtsDigit))) { + --d; + *d = 0; + } + } + } else { + for (;;){ + (*d) <<= 8; + (*d) |= bytes[i]; + if (--i < 0) { + break; + } + if (!(--j % sizeof(ErtsDigit))) { + --d; + *d = 0; + } + } + } + } + BIF_RET(res); + } + badarg: + BIF_ERROR(p,BADARG); +} + +BIF_RETTYPE binary_encode_unsigned_1(BIF_ALIST_1) +{ + return do_encode_unsigned(BIF_P,BIF_ARG_1,am_big); +} + +BIF_RETTYPE binary_encode_unsigned_2(BIF_ALIST_2) +{ + return do_encode_unsigned(BIF_P,BIF_ARG_1,BIF_ARG_2); +} + +BIF_RETTYPE binary_decode_unsigned_1(BIF_ALIST_1) +{ + return do_decode_unsigned(BIF_P,BIF_ARG_1,am_big); +} + +BIF_RETTYPE binary_decode_unsigned_2(BIF_ALIST_2) +{ + return do_decode_unsigned(BIF_P,BIF_ARG_1,BIF_ARG_2); +} + /* * Hard debug functions (dump) for the search structures */ -- cgit v1.2.3 From 0a3305354685b311cfa85b29214b411b24aafcff Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Tue, 4 May 2010 14:24:50 +0200 Subject: Add guard BIFs binary_part/2,3 Add the gc_bif's to the VM. Add infrastructure for gc_bif's (guard bifs that can gc) with two and. three arguments in VM (loader and VM). Add compiler support for gc_bif with three arguments. Add compiler (and interpreter) support for new guard BIFs. Add testcases for new guard BIFs in compiler and emulator. --- erts/emulator/beam/beam_emu.c | 79 ++++++++++++++++++ erts/emulator/beam/beam_load.c | 86 +++++++++++++++++-- erts/emulator/beam/bif.tab | 20 +++-- erts/emulator/beam/erl_bif_binary.c | 125 +++++++++++++++++++++++++--- erts/emulator/beam/erl_bif_guard.c | 37 +++++++++ erts/emulator/beam/erl_binary.h | 3 + erts/emulator/beam/erl_vm.h | 3 +- erts/emulator/beam/global.h | 3 + erts/emulator/beam/ops.tab | 33 ++++++-- erts/emulator/test/Makefile | 3 +- erts/emulator/test/guard_SUITE.erl | 160 +++++++++++++++++++++++++++++++++++- 11 files changed, 517 insertions(+), 35 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index ee8ba54cb8..f0b04535dd 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -2026,6 +2026,81 @@ void process_main(void) goto post_error_handling; } + OpCase(i_gc_bif2_jIId): /* Note, one less parameter than the i_gc_bif1 + and i_gc_bif3 */ + { + typedef Eterm (*GcBifFunction)(Process*, Eterm*, Uint); + GcBifFunction bf; + Eterm result; + Uint live = (Uint) Arg(2); + + reg[0] = r(0); + reg[live++] = tmp_arg1; + reg[live] = tmp_arg2; + bf = (GcBifFunction) Arg(1); + c_p->fcalls = FCALLS; + SWAPOUT; + PROCESS_MAIN_CHK_LOCKS(c_p); + ERTS_SMP_UNREQ_PROC_MAIN_LOCK(c_p); + result = (*bf)(c_p, reg, live); + ERTS_SMP_REQ_PROC_MAIN_LOCK(c_p); + PROCESS_MAIN_CHK_LOCKS(c_p); + SWAPIN; + r(0) = reg[0]; + ERTS_HOLE_CHECK(c_p); + FCALLS = c_p->fcalls; + if (is_value(result)) { + StoreBifResult(3, result); + } + if (Arg(0) != 0) { + SET_I((BeamInstr *) Arg(0)); + Goto(*I); + } + reg[0] = tmp_arg1; + reg[1] = tmp_arg2; + I = handle_error(c_p, I, reg, translate_gc_bif((void *) bf)); + goto post_error_handling; + } + + OpCase(i_gc_bif3_jIsId): + { + typedef Eterm (*GcBifFunction)(Process*, Eterm*, Uint); + GcBifFunction bf; + Eterm arg; + Eterm result; + Uint live = (Uint) Arg(3); + + GetArg1(2, arg); + reg[0] = r(0); + reg[live++] = arg; + reg[live++] = tmp_arg1; + reg[live] = tmp_arg2; + bf = (GcBifFunction) Arg(1); + c_p->fcalls = FCALLS; + SWAPOUT; + PROCESS_MAIN_CHK_LOCKS(c_p); + ERTS_SMP_UNREQ_PROC_MAIN_LOCK(c_p); + result = (*bf)(c_p, reg, live); + ERTS_SMP_REQ_PROC_MAIN_LOCK(c_p); + PROCESS_MAIN_CHK_LOCKS(c_p); + SWAPIN; + r(0) = reg[0]; + ERTS_HOLE_CHECK(c_p); + FCALLS = c_p->fcalls; + if (is_value(result)) { + StoreBifResult(4, result); + } + if (Arg(0) != 0) { + SET_I((BeamInstr *) Arg(0)); + Goto(*I); + } + reg[0] = arg; + reg[1] = tmp_arg1; + reg[2] = tmp_arg2; + I = handle_error(c_p, I, reg, translate_gc_bif((void *) bf)); + goto post_error_handling; + } + /* * Guards bifs and, or, xor in guards. */ @@ -4986,6 +5061,10 @@ translate_gc_bif(void* gcf) return round_1; } else if (gcf == erts_gc_trunc_1) { return round_1; + } else if (gcf == erts_gc_binary_part_2) { + return binary_part_2; + } else if (gcf == erts_gc_binary_part_3) { + return binary_part_3; } else { erl_exit(1, "bad gc bif"); } diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 5e4375fc96..597f604e22 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -486,9 +486,6 @@ static GenOp* const_select_val(LoaderState* stp, GenOpArg S, GenOpArg Fail, GenOpArg Size, GenOpArg* Rest); static GenOp* gen_func_info(LoaderState* stp, GenOpArg mod, GenOpArg Func, GenOpArg arity, GenOpArg label); -static GenOp* -gen_guard_bif(LoaderState* stp, GenOpArg Fail, GenOpArg Live, GenOpArg Bif, - GenOpArg Src, GenOpArg Dst); static int freeze_code(LoaderState* stp); @@ -3358,9 +3355,15 @@ gen_make_fun2(LoaderState* stp, GenOpArg idx) op->next = NULL; return op; } - +/* + * Rewrite gc_bifs with one parameter (the common case). Utilized + * in ops.tab to rewrite instructions calling bif's in guards + * to use a garbage collecting implementation. The instructions + * are sometimes once again rewritten to handle literals (putting the + * parameter in the mostly unused r[0] before the instruction is executed). + */ static GenOp* -gen_guard_bif(LoaderState* stp, GenOpArg Fail, GenOpArg Live, GenOpArg Bif, +gen_guard_bif1(LoaderState* stp, GenOpArg Fail, GenOpArg Live, GenOpArg Bif, GenOpArg Src, GenOpArg Dst) { GenOp* op; @@ -3372,6 +3375,8 @@ gen_guard_bif(LoaderState* stp, GenOpArg Fail, GenOpArg Live, GenOpArg Bif, op->a[0] = Fail; op->a[1].type = TAG_u; bf = stp->import[Bif.val].bf; + /* The translations here need to have a reverse counterpart in + beam_emu.c:translate_gc_bif for error handling to work properly. */ if (bf == length_1) { op->a[1].val = (BeamInstr) (void *) erts_gc_length_1; } else if (bf == size_1) { @@ -3398,6 +3403,77 @@ gen_guard_bif(LoaderState* stp, GenOpArg Fail, GenOpArg Live, GenOpArg Bif, return op; } +/* + * This is used by the ops.tab rule that rewrites gc_bifs with two parameters + * The instruction returned is then again rewritten to an i_load instruction + * folowed by i_gc_bif2_jIId, to handle literals properly. + * As opposed to the i_gc_bif1_jIsId, the instruction i_gc_bif2_jIId is + * always rewritten, regardless of if there actually are any literals. + */ +static GenOp* +gen_guard_bif2(LoaderState* stp, GenOpArg Fail, GenOpArg Live, GenOpArg Bif, + GenOpArg S1, GenOpArg S2, GenOpArg Dst) +{ + GenOp* op; + BifFunction bf; + + NEW_GENOP(stp, op); + op->op = genop_ii_gc_bif2_6; + op->arity = 6; + op->a[0] = Fail; + op->a[1].type = TAG_u; + bf = stp->import[Bif.val].bf; + /* The translations here need to have a reverse counterpart in + beam_emu.c:translate_gc_bif for error handling to work properly. */ + if (bf == binary_part_2) { + op->a[1].val = (BeamInstr) (void *) erts_gc_binary_part_2; + } else { + abort(); + } + op->a[2] = S1; + op->a[3] = S2; + op->a[4] = Live; + op->a[5] = Dst; + op->next = NULL; + return op; +} + +/* + * This is used by the ops.tab rule that rewrites gc_bifs with three parameters + * The instruction returned is then again rewritten to a move instruction that + * uses r[0] for temp storage, followed by an i_load instruction, + * folowed by i_gc_bif3_jIsId, to handle literals properly. Rewriting + * always occur, as with the gc_bif2 counterpart. + */ +static GenOp* +gen_guard_bif3(LoaderState* stp, GenOpArg Fail, GenOpArg Live, GenOpArg Bif, + GenOpArg S1, GenOpArg S2, GenOpArg S3, GenOpArg Dst) +{ + GenOp* op; + BifFunction bf; + + NEW_GENOP(stp, op); + op->op = genop_ii_gc_bif3_7; + op->arity = 7; + op->a[0] = Fail; + op->a[1].type = TAG_u; + bf = stp->import[Bif.val].bf; + /* The translations here need to have a reverse counterpart in + beam_emu.c:translate_gc_bif for error handling to work properly. */ + if (bf == binary_part_3) { + op->a[1].val = (BeamInstr) (void *) erts_gc_binary_part_3; + } else { + abort(); + } + op->a[2] = S1; + op->a[3] = S2; + op->a[4] = S3; + op->a[5] = Live; + op->a[6] = Dst; + op->next = NULL; + return op; +} + /* * Freeze the code in memory, move the string table into place, diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 157fbb7bb6..9feb302a3d 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -754,12 +754,21 @@ bif erlang:load_nif/2 bif erlang:call_on_load_function/1 bif erlang:finish_after_on_load/2 -# The binary match bifs (New in R13B04 - EEP9) +# +# New Bifs in R13B4 +# +bif erlang:binary_to_term/2 + +# +# The binary match bifs (New in R14A - EEP9) # # # The searching/splitting/substituting thingies # +ubif erlang:binary_part/2 +ubif erlang:binary_part/3 + bif binary:compile_pattern/1 bif binary:match/2 bif binary:match/3 @@ -770,8 +779,8 @@ bif binary:longest_common_suffix/1 bif binary:first/1 bif binary:last/1 bif binary:at/2 -bif binary:part/2 -bif binary:part/3 +bif binary:part/2 binary_binary_part_2 +bif binary:part/3 binary_binary_part_3 bif binary:bin_to_list/1 bif binary:bin_to_list/2 bif binary:bin_to_list/3 @@ -784,11 +793,6 @@ bif binary:encode_unsigned/2 bif binary:decode_unsigned/1 bif binary:decode_unsigned/2 -# -# New Bifs in R13B4 -# -bif erlang:binary_to_term/2 - # # Obsolete # diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c index d073eb072f..ea8ea2e53d 100644 --- a/erts/emulator/beam/erl_bif_binary.c +++ b/erts/emulator/beam/erl_bif_binary.c @@ -1523,7 +1523,8 @@ BIF_RETTYPE binary_matches_2(BIF_ALIST_2) return binary_matches_3(BIF_P,BIF_ARG_1,BIF_ARG_2,((Eterm) 0)); } -BIF_RETTYPE binary_part_3(BIF_ALIST_3) + +BIF_RETTYPE erts_binary_part(Process *p, Eterm binary, Eterm epos, Eterm elen) { Uint pos; Sint len; @@ -1535,13 +1536,13 @@ BIF_RETTYPE binary_part_3(BIF_ALIST_3) Eterm* hp; ErlSubBin* sb; - if (is_not_binary(BIF_ARG_1)) { + if (is_not_binary(binary)) { goto badarg; } - if (!term_to_Uint(BIF_ARG_2, &pos)) { + if (!term_to_Uint(epos, &pos)) { goto badarg; } - if (!term_to_Sint(BIF_ARG_3, &len)) { + if (!term_to_Sint(elen, &len)) { goto badarg; } if (len < 0) { @@ -1558,16 +1559,16 @@ BIF_RETTYPE binary_part_3(BIF_ALIST_3) if (((pos + len) - len) != pos) { goto badarg; } - if ((orig_size = binary_size(BIF_ARG_1)) < pos || + if ((orig_size = binary_size(binary)) < pos || orig_size < (pos + len)) { goto badarg; } - hp = HAlloc(BIF_P, ERL_SUB_BIN_SIZE); + hp = HAlloc(p, ERL_SUB_BIN_SIZE); - ERTS_GET_REAL_BIN(BIF_ARG_1, orig, offset, bit_offset, bit_size); + ERTS_GET_REAL_BIN(binary, orig, offset, bit_offset, bit_size); sb = (ErlSubBin *) hp; sb->thing_word = HEADER_SUB_BIN; sb->size = len; @@ -1580,10 +1581,114 @@ BIF_RETTYPE binary_part_3(BIF_ALIST_3) BIF_RET(make_binary(sb)); badarg: - BIF_ERROR(BIF_P, BADARG); + BIF_ERROR(p, BADARG); +} + +#define ERTS_NEED_GC(p, need) ((HEAP_LIMIT((p)) - HEAP_TOP((p))) <= (need)) + +BIF_RETTYPE erts_gc_binary_part(Process *p, Eterm *reg, Eterm live, int range_is_tuple) +{ + Uint pos; + Sint len; + size_t orig_size; + Eterm orig; + Uint offset; + Uint bit_offset; + Uint bit_size; + Eterm* hp; + ErlSubBin* sb; + Eterm binary; + Eterm *tp; + Eterm epos, elen; + int extra_args; + + + if (range_is_tuple) { + Eterm tpl = reg[live]; + extra_args = 1; + if (is_not_tuple(tpl)) { + goto badarg; + } + tp = tuple_val(tpl); + if (arityval(*tp) != 2) { + goto badarg; + } + + epos = tp[1]; + elen = tp[2]; + } else { + extra_args = 2; + epos = reg[live-1]; + elen = reg[live]; + } + binary = reg[live-extra_args]; + + if (is_not_binary(binary)) { + goto badarg; + } + if (!term_to_Uint(epos, &pos)) { + goto badarg; + } + if (!term_to_Sint(elen, &len)) { + goto badarg; + } + if (len < 0) { + Sint lentmp = -len; + if (-lentmp != len) { + goto badarg; + } + len = lentmp; + if (len > pos) { + goto badarg; + } + pos -= len; + } + if (((pos + len) - len) != pos) { + goto badarg; + } + if ((orig_size = binary_size(binary)) < pos || + orig_size < (pos + len)) { + goto badarg; + } + + if (ERTS_NEED_GC(p, ERL_SUB_BIN_SIZE)) { + erts_garbage_collect(p, ERL_SUB_BIN_SIZE, reg, live+1-extra_args); /* I don't need the tuple + or indices any more */ + binary = reg[live-extra_args]; + } + + hp = p->htop; + p->htop += ERL_SUB_BIN_SIZE; + + ERTS_GET_REAL_BIN(binary, orig, offset, bit_offset, bit_size); + + sb = (ErlSubBin *) hp; + sb->thing_word = HEADER_SUB_BIN; + sb->size = len; + sb->offs = offset + pos; + sb->orig = orig; + sb->bitoffs = bit_offset; + sb->bitsize = 0; + sb->is_writable = 0; + + BIF_RET(make_binary(sb)); + + badarg: + BIF_ERROR(p, BADARG); +} +/************************************************************* + * The actual guard BIFs are in erl_bif_guard.c + * but the implementation of both the non-gc and the gc + * variants are here. Note that the functions are named so that they do + * not clash with the guard bif's erlang:binary_part/2,3 + *************************************************************/ + +BIF_RETTYPE binary_binary_part_3(BIF_ALIST_3) +{ + return erts_binary_part(BIF_P,BIF_ARG_1,BIF_ARG_2, BIF_ARG_3); } -BIF_RETTYPE binary_part_2(BIF_ALIST_2) +BIF_RETTYPE binary_binary_part_2(BIF_ALIST_2) { Eterm *tp; if (is_not_tuple(BIF_ARG_2)) { @@ -1593,7 +1698,7 @@ BIF_RETTYPE binary_part_2(BIF_ALIST_2) if (arityval(*tp) != 2) { goto badarg; } - return binary_part_3(BIF_P,BIF_ARG_1,tp[1], tp[2]); + return erts_binary_part(BIF_P,BIF_ARG_1,tp[1], tp[2]); badarg: BIF_ERROR(BIF_P,BADARG); } diff --git a/erts/emulator/beam/erl_bif_guard.c b/erts/emulator/beam/erl_bif_guard.c index 440b0b4f14..01e6977a2c 100644 --- a/erts/emulator/beam/erl_bif_guard.c +++ b/erts/emulator/beam/erl_bif_guard.c @@ -314,6 +314,30 @@ double_to_integer(Process* p, double x) return res; } +/******************************************************************************** + * binary_part guards. The actual implementation is in erl_bif_binary.c + ********************************************************************************/ +BIF_RETTYPE binary_part_3(BIF_ALIST_3) +{ + return erts_binary_part(BIF_P,BIF_ARG_1,BIF_ARG_2, BIF_ARG_3); +} + +BIF_RETTYPE binary_part_2(BIF_ALIST_2) +{ + Eterm *tp; + if (is_not_tuple(BIF_ARG_2)) { + goto badarg; + } + tp = tuple_val(BIF_ARG_2); + if (arityval(*tp) != 2) { + goto badarg; + } + return erts_binary_part(BIF_P,BIF_ARG_1,tp[1], tp[2]); + badarg: + BIF_ERROR(BIF_P,BADARG); +} + + /* * The following code is used when a guard that may build on the * heap is called directly. They must not use HAlloc(), but must @@ -630,3 +654,16 @@ gc_double_to_integer(Process* p, double x, Eterm* reg, Uint live) } return res; } + +/******************************************************************************** + * binary_part guards. The actual implementation is in erl_bif_binary.c + ********************************************************************************/ +Eterm erts_gc_binary_part_3(Process* p, Eterm* reg, Uint live) +{ + return erts_gc_binary_part(p,reg,live,0); +} + +Eterm erts_gc_binary_part_2(Process* p, Eterm* reg, Uint live) +{ + return erts_gc_binary_part(p,reg,live,1); +} diff --git a/erts/emulator/beam/erl_binary.h b/erts/emulator/beam/erl_binary.h index aeeebf3c74..a569fe2e85 100644 --- a/erts/emulator/beam/erl_binary.h +++ b/erts/emulator/beam/erl_binary.h @@ -158,6 +158,9 @@ byte* erts_get_aligned_binary_bytes_extra(Eterm, byte**, ErtsAlcType_t, unsigned */ BIF_RETTYPE erts_list_to_binary_bif(Process *p, Eterm arg); +BIF_RETTYPE erts_gc_binary_part(Process *p, Eterm *reg, Eterm live, int range_is_tuple); +BIF_RETTYPE erts_binary_part(Process *p, Eterm binary, Eterm epos, Eterm elen); + #if defined(__i386__) || !defined(__GNUC__) /* diff --git a/erts/emulator/beam/erl_vm.h b/erts/emulator/beam/erl_vm.h index 235bd7931d..eeeeb7ccfd 100644 --- a/erts/emulator/beam/erl_vm.h +++ b/erts/emulator/beam/erl_vm.h @@ -61,8 +61,9 @@ /* * The new arithmetic operations need some extra X registers in the register array. + * so does the gc_bif's (i_gc_bif3 need 3 extra). */ -#define ERTS_X_REGS_ALLOCATED (MAX_REG+2) +#define ERTS_X_REGS_ALLOCATED (MAX_REG+3) #define INPUT_REDUCTIONS (2 * CONTEXT_REDS) diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index 4745aaf9f5..a7990e1799 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -1579,6 +1579,7 @@ Sint erts_re_set_loop_limit(Sint limit); /* erl_bif_binary.c */ void erts_init_bif_binary(void); Sint erts_binary_set_loop_limit(Sint limit); + /* erl_unicode.c */ void erts_init_unicode(void); Sint erts_unicode_set_loop_limit(Sint limit); @@ -1708,6 +1709,8 @@ Eterm erts_gc_abs_1(Process* p, Eterm* reg, Uint live); Eterm erts_gc_float_1(Process* p, Eterm* reg, Uint live); Eterm erts_gc_round_1(Process* p, Eterm* reg, Uint live); Eterm erts_gc_trunc_1(Process* p, Eterm* reg, Uint live); +Eterm erts_gc_binary_part_3(Process* p, Eterm* reg, Uint live); +Eterm erts_gc_binary_part_2(Process* p, Eterm* reg, Uint live); Uint erts_current_reductions(Process* current, Process *p); diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 49280a60e0..d6feef3fb9 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -1390,33 +1390,50 @@ bif1 Fail u$bif:erlang:trunc/1 s d => too_old_compiler # Guard BIFs. # gc_bif1 Fail I Bif=u$bif:erlang:length/1 Src Dst=d => \ - gen_guard_bif(Fail, I, Bif, Src, Dst) + gen_guard_bif1(Fail, I, Bif, Src, Dst) gc_bif1 Fail I Bif=u$bif:erlang:size/1 Src Dst=d => \ - gen_guard_bif(Fail, I, Bif, Src, Dst) + gen_guard_bif1(Fail, I, Bif, Src, Dst) gc_bif1 Fail I Bif=u$bif:erlang:bit_size/1 Src Dst=d => \ - gen_guard_bif(Fail, I, Bif, Src, Dst) + gen_guard_bif1(Fail, I, Bif, Src, Dst) gc_bif1 Fail I Bif=u$bif:erlang:byte_size/1 Src Dst=d => \ - gen_guard_bif(Fail, I, Bif, Src, Dst) + gen_guard_bif1(Fail, I, Bif, Src, Dst) gc_bif1 Fail I Bif=u$bif:erlang:abs/1 Src Dst=d => \ - gen_guard_bif(Fail, I, Bif, Src, Dst) + gen_guard_bif1(Fail, I, Bif, Src, Dst) gc_bif1 Fail I Bif=u$bif:erlang:float/1 Src Dst=d => \ - gen_guard_bif(Fail, I, Bif, Src, Dst) + gen_guard_bif1(Fail, I, Bif, Src, Dst) gc_bif1 Fail I Bif=u$bif:erlang:round/1 Src Dst=d => \ - gen_guard_bif(Fail, I, Bif, Src, Dst) + gen_guard_bif1(Fail, I, Bif, Src, Dst) gc_bif1 Fail I Bif=u$bif:erlang:trunc/1 Src Dst=d => \ - gen_guard_bif(Fail, I, Bif, Src, Dst) + gen_guard_bif1(Fail, I, Bif, Src, Dst) + +gc_bif2 Fail I Bif=u$bif:erlang:binary_part/2 S1 S2 Dst=d => \ + gen_guard_bif2(Fail, I, Bif, S1, S2, Dst) + +gc_bif3 Fail I Bif=u$bif:erlang:binary_part/3 S1 S2 S3 Dst=d => \ + gen_guard_bif3(Fail, I, Bif, S1, S2, S3, Dst) i_gc_bif1 Fail Bif V=q Live D => move V x | i_gc_bif1 Fail Bif x Live D i_gc_bif1 j I s I d +ii_gc_bif2/6 + +ii_gc_bif2 Fail Bif S1 S2 Live D => i_fetch S1 S2 | i_gc_bif2 Fail Bif Live D + +i_gc_bif2 j I I d + +ii_gc_bif3/7 + +ii_gc_bif3 Fail Bif S1 S2 S3 Live D => move S1 x | i_fetch S2 S3 | i_gc_bif3 Fail Bif x Live D + +i_gc_bif3 j I s I d # # R13B03 # diff --git a/erts/emulator/test/Makefile b/erts/emulator/test/Makefile index 2424fedbed..97b94c466f 100644 --- a/erts/emulator/test/Makefile +++ b/erts/emulator/test/Makefile @@ -118,7 +118,8 @@ NO_OPT= bs_bincomp \ bs_match_int \ bs_match_tail \ bs_match_misc \ - bs_utf + bs_utf \ + guard NO_OPT_MODULES= $(NO_OPT:%=%_no_opt_SUITE) diff --git a/erts/emulator/test/guard_SUITE.erl b/erts/emulator/test/guard_SUITE.erl index 23482a20d7..fa311badf1 100644 --- a/erts/emulator/test/guard_SUITE.erl +++ b/erts/emulator/test/guard_SUITE.erl @@ -20,14 +20,15 @@ -module(guard_SUITE). -export([all/1, bad_arith/1, bad_tuple/1, test_heap_guards/1, guard_bifs/1, - type_tests/1]). + type_tests/1,guard_bif_binary_part/1]). -include("test_server.hrl"). -export([init/3]). -import(lists, [member/2]). -all(suite) -> [bad_arith, bad_tuple, test_heap_guards, guard_bifs, type_tests]. +all(suite) -> [bad_arith, bad_tuple, test_heap_guards, guard_bifs, + type_tests, guard_bif_binary_part]. bad_arith(doc) -> "Test that a bad arithmetic operation in a guard works correctly."; bad_arith(Config) when is_list(Config) -> @@ -136,6 +137,161 @@ init(Fun, Args, Filler) -> dummy(_) -> ok. +-define(MASK_ERROR(EXPR),mask_error((catch (EXPR)))). +mask_error({'EXIT',{Err,_}}) -> + Err; +mask_error(Else) -> + Else. + +guard_bif_binary_part(doc) -> + ["Test the binary_part/2,3 guard BIF's extensively"]; +guard_bif_binary_part(Config) when is_list(Config) -> + F = fun(X) -> + Master = self(), + {Pid,Ref} = spawn_monitor( fun() -> + A = lists:duplicate(X,a), + B = [do_binary_part_guard() | A], + Master ! {self(),hd(B)}, + ok + end), + receive + {Pid,ok} -> + erlang:demonitor(Ref,[flush]), + ok; + Error -> + Error + end + end, + [ ok = F(N) || N <- lists:seq(1,10000) ], + ok. + + +do_binary_part_guard() -> + ?line 1 = bptest(<<1,2,3>>), + ?line 2 = bptest(<<2,1,3>>), + ?line error = bptest(<<1>>), + ?line error = bptest(<<>>), + ?line error = bptest(apa), + ?line 3 = bptest(<<2,3,3>>), + % With one variable (pos) + ?line 1 = bptest(<<1,2,3>>,1), + ?line 2 = bptest(<<2,1,3>>,1), + ?line error = bptest(<<1>>,1), + ?line error = bptest(<<>>,1), + ?line error = bptest(apa,1), + ?line 3 = bptest(<<2,3,3>>,1), + % With one variable (length) + ?line 1 = bptesty(<<1,2,3>>,1), + ?line 2 = bptesty(<<2,1,3>>,1), + ?line error = bptesty(<<1>>,1), + ?line error = bptesty(<<>>,1), + ?line error = bptesty(apa,1), + ?line 3 = bptesty(<<2,3,3>>,2), + % With one variable (whole tuple) + ?line 1 = bptestx(<<1,2,3>>,{1,1}), + ?line 2 = bptestx(<<2,1,3>>,{1,1}), + ?line error = bptestx(<<1>>,{1,1}), + ?line error = bptestx(<<>>,{1,1}), + ?line error = bptestx(apa,{1,1}), + ?line 3 = bptestx(<<2,3,3>>,{1,2}), + % With two variables + ?line 1 = bptest(<<1,2,3>>,1,1), + ?line 2 = bptest(<<2,1,3>>,1,1), + ?line error = bptest(<<1>>,1,1), + ?line error = bptest(<<>>,1,1), + ?line error = bptest(apa,1,1), + ?line 3 = bptest(<<2,3,3>>,1,2), + % Direct (autoimported) call, these will be evaluated by the compiler... + ?line <<2>> = binary_part(<<1,2,3>>,1,1), + ?line <<1>> = binary_part(<<2,1,3>>,1,1), + % Compiler warnings due to constant evaluation expected (3) + ?line badarg = ?MASK_ERROR(binary_part(<<1>>,1,1)), + ?line badarg = ?MASK_ERROR(binary_part(<<>>,1,1)), + ?line badarg = ?MASK_ERROR(binary_part(apa,1,1)), + ?line <<3,3>> = binary_part(<<2,3,3>>,1,2), + % Direct call through apply + ?line <<2>> = apply(erlang,binary_part,[<<1,2,3>>,1,1]), + ?line <<1>> = apply(erlang,binary_part,[<<2,1,3>>,1,1]), + % Compiler warnings due to constant evaluation expected (3) + ?line badarg = ?MASK_ERROR(apply(erlang,binary_part,[<<1>>,1,1])), + ?line badarg = ?MASK_ERROR(apply(erlang,binary_part,[<<>>,1,1])), + ?line badarg = ?MASK_ERROR(apply(erlang,binary_part,[apa,1,1])), + ?line <<3,3>> = apply(erlang,binary_part,[<<2,3,3>>,1,2]), + % Constant propagation + ?line Bin = <<1,2,3>>, + ?line ok = if + binary_part(Bin,1,1) =:= <<2>> -> + ok; + %% Compiler warning, clause cannot match (expected) + true -> + error + end, + ?line ok = if + binary_part(Bin,{1,1}) =:= <<2>> -> + ok; + %% Compiler warning, clause cannot match (expected) + true -> + error + end, + ok. + + +bptest(B) when length(B) =:= 1337 -> + 1; +bptest(B) when binary_part(B,{1,1}) =:= <<2>> -> + 1; +bptest(B) when erlang:binary_part(B,1,1) =:= <<1>> -> + 2; +bptest(B) when erlang:binary_part(B,{1,2}) =:= <<3,3>> -> + 3; +bptest(_) -> + error. + +bptest(B,A) when length(B) =:= A -> + 1; +bptest(B,A) when binary_part(B,{A,1}) =:= <<2>> -> + 1; +bptest(B,A) when erlang:binary_part(B,A,1) =:= <<1>> -> + 2; +bptest(B,A) when erlang:binary_part(B,{A,2}) =:= <<3,3>> -> + 3; +bptest(_,_) -> + error. + +bptestx(B,A) when length(B) =:= A -> + 1; +bptestx(B,A) when binary_part(B,A) =:= <<2>> -> + 1; +bptestx(B,A) when erlang:binary_part(B,A) =:= <<1>> -> + 2; +bptestx(B,A) when erlang:binary_part(B,A) =:= <<3,3>> -> + 3; +bptestx(_,_) -> + error. + +bptesty(B,A) when length(B) =:= A -> + 1; +bptesty(B,A) when binary_part(B,{1,A}) =:= <<2>> -> + 1; +bptesty(B,A) when erlang:binary_part(B,1,A) =:= <<1>> -> + 2; +bptesty(B,A) when erlang:binary_part(B,{1,A}) =:= <<3,3>> -> + 3; +bptesty(_,_) -> + error. + +bptest(B,A,_C) when length(B) =:= A -> + 1; +bptest(B,A,C) when binary_part(B,{A,C}) =:= <<2>> -> + 1; +bptest(B,A,C) when erlang:binary_part(B,A,C) =:= <<1>> -> + 2; +bptest(B,A,C) when erlang:binary_part(B,{A,C}) =:= <<3,3>> -> + 3; +bptest(_,_,_) -> + error. + + guard_bifs(doc) -> "Test all guard bifs with nasty (but legal arguments)."; guard_bifs(Config) when is_list(Config) -> ?line Big = -237849247829874297658726487367328971246284736473821617265433, -- cgit v1.2.3 From ababeff98d7def30898786629cc5a16ea3949a24 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Thu, 6 May 2010 16:30:26 +0200 Subject: Correct Boyer More and trapping for longest_common_suffix Add testcases and clean up to get better code coverage. --- erts/emulator/beam/erl_bif_binary.c | 76 +++++++++++++++++++++++-------------- erts/emulator/test/guard_SUITE.erl | 9 +++++ 2 files changed, 57 insertions(+), 28 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c index ea8ea2e53d..cba17ee6eb 100644 --- a/erts/emulator/beam/erl_bif_binary.c +++ b/erts/emulator/beam/erl_bif_binary.c @@ -260,7 +260,11 @@ static void dump_ac_node(ACNode *node, int indent, int ch); /* * Callback for the magic binary */ -static void cleanup_my_data(Binary *bp) +static void cleanup_my_data_ac(Binary *bp) +{ + return; +} +static void cleanup_my_data_bm(Binary *bp) { return; } @@ -277,7 +281,7 @@ static ACTrie *create_acdata(MyAllocator *my, Uint len, Uint datasize = AC_SIZE(len); ACTrie *act; ACNode *acn; - Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data); + Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data_ac); byte *data = ERTS_MAGIC_BIN_DATA(mb); init_my_allocator(my, datasize, data); @@ -306,7 +310,7 @@ static BMData *create_bmdata(MyAllocator *my, byte *x, Uint len, { Uint datasize = BM_SIZE(len); BMData *bmd; - Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data); + Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data_bm); byte *data = ERTS_MAGIC_BIN_DATA(mb); init_my_allocator(my, datasize, data); bmd = my_alloc(my, sizeof(BMData)); @@ -708,7 +712,7 @@ static void compute_suffixes(byte *x, Sint m, Sint *suffixes) g = m - 1; for (i = m - 2; i >= 0; --i) { - if (i > g && suffixes[i + m - f] < i - g) { + if (i > g && suffixes[i + m - 1 - f] < i - g) { suffixes[i] = suffixes[i + m - 1 - f]; } else { if (i < g) { @@ -731,7 +735,7 @@ static void compute_goodshifts(BMData *bmd) Sint m = bmd->len; byte *x = bmd->x; Sint i, j; - Sint *suffixes = erts_alloc(ERTS_ALC_T_TMP, m * sizeof(Uint)); + Sint *suffixes = erts_alloc(ERTS_ALC_T_TMP, m * sizeof(Sint)); compute_suffixes(x, m, suffixes); @@ -1329,13 +1333,15 @@ static int parse_match_opts_list(Eterm l, Eterm bin, Uint *posp, Uint *endp) } if (len < 0) { Sint lentmp = -len; - if (-lentmp != len) { + /* overflow */ + if (lentmp == len || lentmp < 0 || -lentmp != len) { goto badarg; } len = lentmp; pos -= len; } - if (((pos + len) - len) != pos) { + /* overflow */ + if ((pos + len) < pos || (len > 0 && (pos + len) == pos)) { goto badarg; } *endp = len + pos; @@ -1367,10 +1373,9 @@ static BIF_RETTYPE binary_match_trap(BIF_ALIST_3) BIF_TRAP3(&binary_match_trap_export, BIF_P, BIF_ARG_1, result, BIF_ARG_3); default: - goto badarg; + /* Cannot badarg in the trap */ + erl_exit(1, "Internal error in binary_match_trap."); } - badarg: - BIF_ERROR(BIF_P,BADARG); } static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3) @@ -1387,10 +1392,9 @@ static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3) BIF_TRAP3(&binary_matches_trap_export, BIF_P, BIF_ARG_1, result, BIF_ARG_3); default: - goto badarg; + /* Cannot badarg in the trap */ + erl_exit(1, "Internal error in binary_matches_trap."); } - badarg: - BIF_ERROR(BIF_P,BADARG); } @@ -1425,7 +1429,12 @@ BIF_RETTYPE binary_match_3(BIF_ALIST_3) } type = tp[1]; bin = ((ProcBin *) binary_val(tp[2]))->val; - if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) { + if (type == am_bm && + ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data_bm) { + goto badarg; + } + if (type == am_ac && + ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data_ac) { goto badarg; } bin_term = tp[2]; @@ -1482,7 +1491,12 @@ BIF_RETTYPE binary_matches_3(BIF_ALIST_3) } type = tp[1]; bin = ((ProcBin *) binary_val(tp[2]))->val; - if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) { + if (type == am_bm && + ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data_bm) { + goto badarg; + } + if (type == am_ac && + ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data_ac) { goto badarg; } bin_term = tp[2]; @@ -1547,7 +1561,8 @@ BIF_RETTYPE erts_binary_part(Process *p, Eterm binary, Eterm epos, Eterm elen) } if (len < 0) { Sint lentmp = -len; - if (-lentmp != len) { + /* overflow */ + if (lentmp == len || lentmp < 0 || -lentmp != len) { goto badarg; } len = lentmp; @@ -1556,7 +1571,8 @@ BIF_RETTYPE erts_binary_part(Process *p, Eterm binary, Eterm epos, Eterm elen) } pos -= len; } - if (((pos + len) - len) != pos) { + /* overflow */ + if ((pos + len) < pos || (len > 0 && (pos + len) == pos)){ goto badarg; } if ((orig_size = binary_size(binary)) < pos || @@ -1634,7 +1650,8 @@ BIF_RETTYPE erts_gc_binary_part(Process *p, Eterm *reg, Eterm live, int range_is } if (len < 0) { Sint lentmp = -len; - if (-lentmp != len) { + /* overflow */ + if (lentmp == len || lentmp < 0 || -lentmp != len) { goto badarg; } len = lentmp; @@ -1643,7 +1660,8 @@ BIF_RETTYPE erts_gc_binary_part(Process *p, Eterm *reg, Eterm live, int range_is } pos -= len; } - if (((pos + len) - len) != pos) { + /* overflow */ + if ((pos + len) < pos || (len > 0 && (pos + len) == pos)) { goto badarg; } if ((orig_size = binary_size(binary)) < pos || @@ -1926,6 +1944,7 @@ static BIF_RETTYPE do_longest_common(Process *p, Eterm list, int direction) unsigned char *tmp = cd[i].buff; cd[i].buff = erts_alloc(ERTS_ALC_T_BINARY_BUFFER, cd[i].bufflen); memcpy(cd[i].buff,tmp,cd[i].bufflen); + cd[i].type = CL_TYPE_HEAP; } } hp = HAlloc(p, PROC_BIN_SIZE); @@ -1975,7 +1994,7 @@ static BIF_RETTYPE do_longest_common_trap(Process *p, Eterm bin_term, Eterm curr ASSERT(res == CL_RESTART); /* Copy all heap binaries that are not already copied (aligned) */ BUMP_ALL_REDS(p); - BIF_TRAP3(&binary_longest_prefix_trap_export, p, bin_term, epos, orig_list); + BIF_TRAP3(trapper, p, bin_term, epos, orig_list); } } @@ -2104,16 +2123,15 @@ BIF_RETTYPE binary_at_2(BIF_ALIST_2) static int do_bin_to_list(Process *p, byte *bytes, Uint bit_offs, Uint start, Sint *lenp, Eterm *termp) { - Uint reds = get_reds(p, BIN_TO_LIST_LOOP_FACTOR); + Uint reds = get_reds(p, BIN_TO_LIST_LOOP_FACTOR); /* reds can never be 0 */ Uint len = *lenp; Uint loops; Eterm *hp; Eterm term = *termp; Uint n; - if (reds == 0) { - return BIN_TO_LIST_TRAP; - } + ASSERT(reds > 0); + loops = MIN(reds,len); BUMP_REDS(p, loops / BIN_TO_LIST_LOOP_FACTOR); @@ -2170,8 +2188,8 @@ static BIF_RETTYPE binary_bin_to_list_trap(BIF_ALIST_3) len = (Sint) ptr[2]; ERTS_GET_BINARY_BYTES(BIF_ARG_1,bytes,bit_offs,bit_size); - if(do_bin_to_list(BIF_P, bytes, bit_offs, start, &len, &res) == - BIN_TO_LIST_OK) { + if (do_bin_to_list(BIF_P, bytes, bit_offs, start, &len, &res) == + BIN_TO_LIST_OK) { BIF_RET(res); } return do_trap_bin_to_list(BIF_P,BIF_ARG_1,start,len,res); @@ -2201,7 +2219,8 @@ static BIF_RETTYPE binary_bin_to_list_common(Process *p, } if (len < 0) { Sint lentmp = -len; - if (-lentmp != len) { + /* overflow */ + if (lentmp == len || lentmp < 0 || -lentmp != len) { goto badarg; } len = lentmp; @@ -2210,7 +2229,8 @@ static BIF_RETTYPE binary_bin_to_list_common(Process *p, } pos -= len; } - if (((pos + len) - len) != pos) { + /* overflow */ + if ((pos + len) < pos || (len > 0 && (pos + len) == pos)) { goto badarg; } sz = binary_size(bin); diff --git a/erts/emulator/test/guard_SUITE.erl b/erts/emulator/test/guard_SUITE.erl index fa311badf1..62de615a90 100644 --- a/erts/emulator/test/guard_SUITE.erl +++ b/erts/emulator/test/guard_SUITE.erl @@ -146,6 +146,15 @@ mask_error(Else) -> guard_bif_binary_part(doc) -> ["Test the binary_part/2,3 guard BIF's extensively"]; guard_bif_binary_part(Config) when is_list(Config) -> + %% Overflow tests that need to be unoptimized + ?line badarg = + ?MASK_ERROR( + binary_part(<<1,2,3>>,{16#FFFFFFFFFFFFFFFF, + -16#7FFFFFFFFFFFFFFF-1})), + ?line badarg = + ?MASK_ERROR( + binary_part(<<1,2,3>>,{16#FFFFFFFFFFFFFFFF, + 16#7FFFFFFFFFFFFFFF})), F = fun(X) -> Master = self(), {Pid,Ref} = spawn_monitor( fun() -> -- cgit v1.2.3 From dce00e268eb36048a729db9a2a9aebd4df0e7395 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Fri, 7 May 2010 09:33:56 +0200 Subject: Add more tests and make some go easier on small systems Even more tests added to cover more error cases, some tests made easier on systems with small memories and timeouts made longer due to timetraps on solaris daily builds. --- erts/emulator/beam/erl_bif_binary.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c index cba17ee6eb..0a40e28474 100644 --- a/erts/emulator/beam/erl_bif_binary.c +++ b/erts/emulator/beam/erl_bif_binary.c @@ -1365,16 +1365,12 @@ static BIF_RETTYPE binary_match_trap(BIF_ALIST_3) Eterm result; Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val; runres = do_binary_match(BIF_P,BIF_ARG_1,0,0,NIL,bin,BIF_ARG_2,&result); - switch (runres) { - case DO_BIN_MATCH_OK: + if (runres == DO_BIN_MATCH_OK) { BIF_RET(result); - case DO_BIN_MATCH_RESTART: + } else { BUMP_ALL_REDS(BIF_P); BIF_TRAP3(&binary_match_trap_export, BIF_P, BIF_ARG_1, result, BIF_ARG_3); - default: - /* Cannot badarg in the trap */ - erl_exit(1, "Internal error in binary_match_trap."); } } @@ -1384,20 +1380,15 @@ static BIF_RETTYPE binary_matches_trap(BIF_ALIST_3) Eterm result; Binary *bin = ((ProcBin *) binary_val(BIF_ARG_3))->val; runres = do_binary_matches(BIF_P,BIF_ARG_1,0,0,NIL,bin,BIF_ARG_2,&result); - switch (runres) { - case DO_BIN_MATCH_OK: + if (runres == DO_BIN_MATCH_OK) { BIF_RET(result); - case DO_BIN_MATCH_RESTART: + } else { BUMP_ALL_REDS(BIF_P); BIF_TRAP3(&binary_matches_trap_export, BIF_P, BIF_ARG_1, result, BIF_ARG_3); - default: - /* Cannot badarg in the trap */ - erl_exit(1, "Internal error in binary_matches_trap."); } } - BIF_RETTYPE binary_match_3(BIF_ALIST_3) { Uint hsstart; -- cgit v1.2.3 From 97ab480df55cf574ab42a87b6927ef5bba83000e Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Mon, 10 May 2010 16:27:58 +0200 Subject: Add documentation for binary module Correct behaviour of copy/2 witn 0 copies. --- erts/doc/src/erlang.xml | 53 +++++++++++++++++++++++++++++++++++++ erts/emulator/beam/erl_bif_binary.c | 3 ++- 2 files changed, 55 insertions(+), 1 deletion(-) (limited to 'erts') diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml index cd9bb85f5c..e90160dfd7 100644 --- a/erts/doc/src/erlang.xml +++ b/erts/doc/src/erlang.xml @@ -252,6 +252,54 @@ iolist() = [char() | binary() | iolist()] "Erlang" + + binary_part(Subject, PosLen) -> binary() + Extracts a part of a binary + + Subject = binary() + PosLen = {Start,Length} + Start = int() + Length = int() + + +

Extracts the part of the binary described by PosLen.

+ +

Negative length can be used to extract bytes at the end of a binary:

+ + +1> Bin = <<1,2,3,4,5,6,7,8,9,10>>. +2> binary_part(Bin,{byte_size(Bin), -5)). +<<6,7,8,9,10>> + + +

If PosLen in any way references outside the binary, a badarg exception is raised.

+ +

Start is zero-based, i.e:

+ +1> Bin = <<1,2,3>> +2> binary_part(Bin,{0,2}). +<<1,2>> + + +

See the STDLIB module binary for details about the PosLen semantics.

+ +

Allowed in guard tests.

+
+
+ + binary_part(Subject, Start, Length) -> binary() + Extracts a part of a binary + + Subject = binary() + Start = int() + Length = int() + + +

The same as binary_part(Subject, {Pos, Len}).

+ +

Allowed in guard tests.

+
+
binary_to_atom(Binary, Encoding) -> atom() Convert from text representation to an atom @@ -318,6 +366,11 @@ iolist() = [char() | binary() | iolist()] corresponding to the bytes from position Start to position Stop in Binary. Positions in the binary are numbered starting from 1.

+ +

This functions indexing style of using one-based indices for + binaries is deprecated. New code should use the functions in + the STDLIB module binary instead. They consequently + use the same (zero-based) style of indexing.

diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c index 0a40e28474..3e8480324c 100644 --- a/erts/emulator/beam/erl_bif_binary.c +++ b/erts/emulator/beam/erl_bif_binary.c @@ -2369,7 +2369,8 @@ static BIF_RETTYPE do_binary_copy(Process *p, Eterm bin, Eterm en) goto badarg; } if (!n) { - goto badarg; + Eterm res_term = erts_new_heap_binary(p,NULL,0,&bytes); + BIF_RET(res_term); } ERTS_GET_BINARY_BYTES(bin,bytes,bit_offs,bit_size); if (bit_size != 0) { -- cgit v1.2.3