From e0c4c2867c20368c5b5d88cbbf92da7b7a3f386e Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Mon, 2 Nov 2009 15:55:20 +0100 Subject: Teach binary.c the semantics to take longest instead of shortest match Add testcase embryos and reference implementation. Change name of compile function according to EEP31. --- erts/emulator/beam/bif.tab | 2 +- erts/emulator/beam/binary.c | 411 +++++++++++++++------------- lib/stdlib/src/binary.erl | 4 +- lib/stdlib/test/binary_module_SUITE.erl | 96 +++++++ lib/stdlib/test/binref.erl | 456 ++++++++++++++++++++++++++++++++ 5 files changed, 779 insertions(+), 190 deletions(-) create mode 100644 lib/stdlib/test/binary_module_SUITE.erl create mode 100644 lib/stdlib/test/binref.erl diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 8d81aa7eba..3f51e6dc45 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -760,7 +760,7 @@ bif erlang:finish_after_on_load/2 # # The searching/splitting/substituting thingies # -bif binary:match_compile/1 +bif binary:compile_pattern/1 bif binary:match/2 bif binary:match/3 bif binary:matches/2 diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c index 29c1af2114..4662e60d51 100644 --- a/erts/emulator/beam/binary.c +++ b/erts/emulator/beam/binary.c @@ -802,13 +802,12 @@ static void cleanup_my_data(Binary *bp) * Initiate a (allocated) micro allocator and fill in the base * for an Aho-Corasick search trie, given the accumulated length of the search strings. */ -static ACTrie *create_acdata(MyAllocator *my, Process *p, Uint len, - ACNode ***qbuff /* out */,Eterm *the_bin /* out */) +static ACTrie *create_acdata(MyAllocator *my, Uint len, + ACNode ***qbuff /* out */,Binary **the_bin /* out */) { Uint datasize = AC_SIZE(len); ACTrie *act; ACNode *acn; - Eterm *hp; Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data); byte *data = ERTS_MAGIC_BIN_DATA(mb); @@ -826,19 +825,17 @@ static ACTrie *create_acdata(MyAllocator *my, Process *p, Uint len, acn->id = 0; #endif *qbuff = erts_alloc(ERTS_ALC_T_TMP, sizeof(ACNode *) * len); - hp = HAlloc(p, PROC_BIN_SIZE); - *the_bin = erts_mk_magic_binary_term(&hp, &MSO(p), mb); + *the_bin = mb; return act; } /* * The same initialization of allocator and basic data for Boyer-More. */ -static BMData *create_bmdata(MyAllocator *my, Process *p, byte *x, Uint len, Eterm *the_bin /* out */) +static BMData *create_bmdata(MyAllocator *my, byte *x, Uint len, Binary **the_bin /* out */) { Uint datasize = BM_SIZE(len); BMData *bmd; - Eterm *hp; Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data); byte *data = ERTS_MAGIC_BIN_DATA(mb); init_my_allocator(my, datasize, data); @@ -848,8 +845,7 @@ static BMData *create_bmdata(MyAllocator *my, Process *p, byte *x, Uint len, Ete bmd->len = len; bmd->goodshift = my_alloc(my,sizeof(Uint) * len); bmd->ret_tuple = 0; - hp = HAlloc(p, PROC_BIN_SIZE); - *the_bin = erts_mk_magic_binary_term(&hp, &MSO(p), mb); + *the_bin = mb; return bmd; } @@ -998,7 +994,8 @@ static Uint ac_find_first_match(ACTrie *act, byte *haystack, Uint len, while (r->final < 0) r = r->h; rstart = i - r->d; - if (candidate == NULL || rstart < candidate_start) { + if (candidate == NULL || rstart < candidate_start || + (rstart == candidate_start && candidate->d < q->d)) { candidate_start = rstart; candidate = r; } @@ -1063,12 +1060,21 @@ static Uint ac_find_all_non_overlapping(ACTrie *act, byte *haystack, Uint len, #endif rstart = i - r->d; save_m = m; - while (m > 0 && out[m-1].pos > rstart) { + while (m > 0 && (out[m-1].pos > rstart || + (out[m-1].pos == rstart && + out[m-1].len < r->d))) { #ifdef HARDDEBUG erts_printf("Popping %u\n",(unsigned) out[m-1].id); #endif --m; } +#ifdef HARDDEBUG + if (m > 0) { + erts_printf("Pos %u\n",out[m-1].pos); + erts_printf("Len %u\n",out[m-1].len); + } + erts_printf("Rstart %u\n",rstart); +#endif if (m == 0 || out[m-1].pos + out[m-1].len <= rstart) { if (m >= allocated) { if (!allocated) { @@ -1213,7 +1219,8 @@ static Sint bm_find_first_match(BMData *bmd, byte *haystack, Uint len) /* * Search functionality interfaces */ -BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1) + +static int do_binary_match_compile(Eterm argument, Eterm *tag, Binary **binp) { Eterm t, b, comp_term = NIL; Uint characters; @@ -1223,9 +1230,9 @@ BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1) characters = 0; words = 0; - if (is_list(BIF_ARG_1)) { + if (is_list(argument)) { return_tuple = 1; - t = BIF_ARG_1; + t = argument; while (is_list(t)) { b = CAR(list_val(t)); t = CDR(list_val(t)); @@ -1242,17 +1249,17 @@ BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1) goto badarg; } if (words > 1) { - comp_term = BIF_ARG_1; + comp_term = argument; } else { - comp_term = CAR(list_val(BIF_ARG_1)); + comp_term = CAR(list_val(argument)); } - } else if (is_binary(BIF_ARG_1)) { - if (binary_bitsize(BIF_ARG_1) != 0) { + } else if (is_binary(argument)) { + if (binary_bitsize(argument) != 0) { goto badarg; } words = 1; - comp_term = BIF_ARG_1; - characters = binary_size(BIF_ARG_1); + comp_term = argument; + characters = binary_size(argument); } if (characters == 0) { @@ -1261,35 +1268,33 @@ BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1) ASSERT(words > 0); if (words == 1) { - Eterm ret; byte *bytes; Uint bitoffs, bitsize; byte *temp_alloc = NULL; MyAllocator my; BMData *bmd; - Eterm *hp; + Binary *bin; ERTS_GET_BINARY_BYTES(comp_term, bytes, bitoffs, bitsize); if (bitoffs != 0) { bytes = erts_get_aligned_binary_bytes(comp_term, &temp_alloc); } - bmd = create_bmdata(&my, BIF_P, bytes, characters, &ret); + bmd = create_bmdata(&my, bytes, characters, &bin); bmd->ret_tuple = return_tuple; compute_badshifts(bmd); compute_goodshifts(bmd); erts_free_aligned_binary_bytes(temp_alloc); CHECK_ALLOCATOR(my); - hp = HAlloc(BIF_P,3); - ret = TUPLE2(hp, am_bm, ret); - BIF_RET(ret); + *tag = am_bm; + *binp = bin; + return 0; } else { - Eterm ret; ACTrie *act; MyAllocator my; - Eterm *hp; ACNode **qbuff; + Binary *bin; - act = create_acdata(&my, BIF_P, characters, &qbuff, &ret); + act = create_acdata(&my, characters, &qbuff, &bin); t = comp_term; while (is_list(t)) { byte *bytes; @@ -1307,18 +1312,37 @@ BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1) ac_compute_failure_functions(act,qbuff); CHECK_ALLOCATOR(my); erts_free(ERTS_ALC_T_TMP,qbuff); - hp = HAlloc(BIF_P,3); - ret = TUPLE2(hp, am_ac, ret); - BIF_RET(ret); + *tag = am_ac; + *binp = bin; + return 0; } badarg: - BIF_ERROR(BIF_P,BADARG); + return -1; +} + +BIF_RETTYPE binary_compile_pattern_1(BIF_ALIST_1) +{ + Binary *bin; + Eterm tag, ret; + Eterm *hp; + + if (do_binary_match_compile(BIF_ARG_1,&tag,&bin)) { + BIF_ERROR(BIF_P,BADARG); + } + hp = HAlloc(BIF_P, PROC_BIN_SIZE+3); + ret = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin); + ret = TUPLE2(hp, tag, ret); + BIF_RET(ret); } + BIF_RETTYPE binary_match_3(BIF_ALIST_3) { Uint hsstart, hslen; Eterm *tp; + Eterm type; + Binary *bin; + Eterm bin_term = NIL; if (is_not_binary(BIF_ARG_1)) { goto badarg; } @@ -1355,85 +1379,90 @@ BIF_RETTYPE binary_match_3(BIF_ALIST_3) if (arityval(*tp) != 2 || is_not_atom(tp[1])) { goto badarg; } - if (tp[1] == am_bm && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { - Binary *mbp; - BMData *bm; - Sint pos; - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - Eterm ret; - Eterm *hp; - mbp = ((ProcBin *) binary_val(tp[2]))->val; - if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) { - goto badarg; - } - bm = (BMData *) ERTS_MAGIC_BIN_DATA(mbp); + if (((tp[1] != am_bm) && (tp[1] != am_ac)) || + !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { + goto badarg; + } + type = tp[1]; + bin = ((ProcBin *) binary_val(tp[2]))->val; + if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) { + goto badarg; + } + bin_term = tp[2]; + } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) { + goto badarg; + } + + if (type == am_bm) { + BMData *bm; + Sint pos; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + Eterm ret; + Eterm *hp; + bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG - dump_bm_data(bm); + dump_bm_data(bm); #endif - ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); - } - pos = bm_find_first_match(bm, bytes + hsstart, hslen); - if (pos < 0) { - ret = am_nomatch; - } else { - Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P); - ret = erts_make_integer(pos+hsstart,BIF_P); - if (bm->ret_tuple) { - hp = HAlloc(BIF_P,3); - ret = TUPLE2(hp, ret, erlen); - } + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + } + pos = bm_find_first_match(bm, bytes + hsstart, hslen); + if (pos < 0) { + ret = am_nomatch; + } else { + Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P); + ret = erts_make_integer(pos+hsstart,BIF_P); + if (bm->ret_tuple) { + hp = HAlloc(BIF_P,3); + ret = TUPLE2(hp, ret, erlen); } - erts_free_aligned_binary_bytes(temp_alloc); - BIF_RET(ret); - } else if (tp[1] == am_ac && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { - Binary *mbp; - ACTrie *act; - Uint pos, msn,rlen; - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - Eterm ret; - Eterm *hp; + } + erts_free_aligned_binary_bytes(temp_alloc); + if (bin_term == NIL) { + erts_bin_free(bin); + } + BIF_RET(ret); + } else if (type == am_ac) { + ACTrie *act; + Uint pos, msn,rlen; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + Eterm ret; + Eterm *hp; - mbp = ((ProcBin *) binary_val(tp[2]))->val; - if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) { - goto badarg; - } - act = (ACTrie *) ERTS_MAGIC_BIN_DATA(mbp); + act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG - dump_ac_trie(act); + dump_ac_trie(act); #endif - ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); - } - msn = ac_find_first_match(act, bytes + hsstart, - hslen, &pos, &rlen); - if (msn == 0) { - ret = am_nomatch; - } else { - Eterm epos = erts_make_integer(pos+hsstart,BIF_P); - Eterm erlen = erts_make_integer(rlen,BIF_P); - hp = HAlloc(BIF_P,3); - ret = TUPLE2(hp, epos, erlen); - } - erts_free_aligned_binary_bytes(temp_alloc); - BIF_RET(ret); - } else { + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + if (bitsize != 0) { goto badarg; } - } else { - goto badarg; /* Compilation on the fly NYI */ + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + } + msn = ac_find_first_match(act, bytes + hsstart, + hslen, &pos, &rlen); + if (msn == 0) { + ret = am_nomatch; + } else { + Eterm epos = erts_make_integer(pos+hsstart,BIF_P); + Eterm erlen = erts_make_integer(rlen,BIF_P); + hp = HAlloc(BIF_P,3); + ret = TUPLE2(hp, epos, erlen); + } + erts_free_aligned_binary_bytes(temp_alloc); + if (bin_term == NIL) { + erts_bin_free(bin); + } + BIF_RET(ret); } badarg: BIF_ERROR(BIF_P,BADARG); @@ -1447,6 +1476,9 @@ BIF_RETTYPE binary_matches_3(BIF_ALIST_3) { Uint hsstart, hslen; Eterm *tp; + Eterm type; + Binary *bin; + Eterm bin_term = NIL; if (is_not_binary(BIF_ARG_1)) { goto badarg; } @@ -1483,98 +1515,103 @@ BIF_RETTYPE binary_matches_3(BIF_ALIST_3) if (arityval(*tp) != 2 || is_not_atom(tp[1])) { goto badarg; } - if (tp[1] == am_bm && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { - Binary *mbp; - BMData *bm; - Sint pos; - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - Eterm ret; - Eterm *hp; - mbp = ((ProcBin *) binary_val(tp[2]))->val; - if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) { - goto badarg; - } - bm = (BMData *) ERTS_MAGIC_BIN_DATA(mbp); + if (((tp[1] != am_bm) && (tp[1] != am_ac)) || + !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { + goto badarg; + } + type = tp[1]; + bin = ((ProcBin *) binary_val(tp[2]))->val; + if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) { + goto badarg; + } + bin_term = tp[2]; + } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) { + goto badarg; + } + + if (type == am_bm) { + BMData *bm; + Sint pos; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + Eterm ret; + Eterm *hp; + bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG - dump_bm_data(bm); + dump_bm_data(bm); #endif - ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); - } - pos = bm_find_first_match(bm, bytes + hsstart, hslen); - if (pos < 0) { - ret = am_nomatch; - } else { - Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P); - ret = erts_make_integer(pos,BIF_P); - if (bm->ret_tuple) { - hp = HAlloc(BIF_P,3); - ret = TUPLE2(hp, ret, erlen); - } + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + } + pos = bm_find_first_match(bm, bytes + hsstart, hslen); + if (pos < 0) { + ret = am_nomatch; + } else { + Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P); + ret = erts_make_integer(pos,BIF_P); + if (bm->ret_tuple) { + hp = HAlloc(BIF_P,3); + ret = TUPLE2(hp, ret, erlen); } - erts_free_aligned_binary_bytes(temp_alloc); - BIF_RET(ret); - } else if (tp[1] == am_ac && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) { - Binary *mbp; - ACTrie *act; - Uint rlen; - Sint i; - FindallData *fad; - byte *bytes; - Uint bitoffs, bitsize; - byte *temp_alloc = NULL; - Eterm ret,tpl; - Eterm *hp; + } + erts_free_aligned_binary_bytes(temp_alloc); + if (bin_term == NIL) { + erts_bin_free(bin); + } + BIF_RET(ret); + } else if (type == am_ac) { + ACTrie *act; + Uint rlen; + Sint i; + FindallData *fad; + byte *bytes; + Uint bitoffs, bitsize; + byte *temp_alloc = NULL; + Eterm ret,tpl; + Eterm *hp; - mbp = ((ProcBin *) binary_val(tp[2]))->val; - if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) { - goto badarg; - } - act = (ACTrie *) ERTS_MAGIC_BIN_DATA(mbp); + act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin); #ifdef HARDDEBUG - dump_ac_trie(act); + dump_ac_trie(act); #endif - ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); - if (bitsize != 0) { - goto badarg; - } - if (bitoffs != 0) { - bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); - } - rlen = ac_find_all_non_overlapping(act, bytes + hsstart, - hslen, &fad); - if (rlen == 0) { - ret = am_nomatch; - } else { - for (i = 0; i < rlen; ++i) { - fad[i].epos = erts_make_integer(fad[i].pos,BIF_P); - fad[i].elen = erts_make_integer(fad[i].len,BIF_P); - } - hp = HAlloc(BIF_P,rlen * (3 + 2)); - ret = NIL; - for (i = rlen - 1; i >= 0; --i) { - tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); - hp +=3; - ret = CONS(hp,tpl,ret); - hp += 2; - } + ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize); + if (bitsize != 0) { + goto badarg; + } + if (bitoffs != 0) { + bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc); + } + rlen = ac_find_all_non_overlapping(act, bytes + hsstart, + hslen, &fad); + if (rlen == 0) { + ret = am_nomatch; + } else { + for (i = 0; i < rlen; ++i) { + fad[i].epos = erts_make_integer(fad[i].pos,BIF_P); + fad[i].elen = erts_make_integer(fad[i].len,BIF_P); } - erts_free_aligned_binary_bytes(temp_alloc); - if (fad != NULL) { - erts_free(ERTS_ALC_T_TMP,fad); + hp = HAlloc(BIF_P,rlen * (3 + 2)); + ret = NIL; + for (i = rlen - 1; i >= 0; --i) { + tpl = TUPLE2(hp, fad[i].epos, fad[i].elen); + hp +=3; + ret = CONS(hp,tpl,ret); + hp += 2; } - BIF_RET(ret); - } else { - goto badarg; } - } else { - goto badarg; /* Compilation on the fly NYI */ + erts_free_aligned_binary_bytes(temp_alloc); + if (fad != NULL) { + erts_free(ERTS_ALC_T_TMP,fad); + } + if (bin_term == NIL) { + erts_bin_free(bin); + } + BIF_RET(ret); } badarg: BIF_ERROR(BIF_P,BADARG); diff --git a/lib/stdlib/src/binary.erl b/lib/stdlib/src/binary.erl index f2774f8c84..d649f974bb 100644 --- a/lib/stdlib/src/binary.erl +++ b/lib/stdlib/src/binary.erl @@ -19,8 +19,8 @@ -module(binary). %% %% The following functions implemented as BIF's -%% binary:match_compile/1 -%% binary:match/3 +%% binary:compile_pattern/1 +%% binary:find/3 %% XXX:PaN more to come... -export([first/1,first/2,last/1,last/2,nth/2,extract/3]). diff --git a/lib/stdlib/test/binary_module_SUITE.erl b/lib/stdlib/test/binary_module_SUITE.erl new file mode 100644 index 0000000000..31d0812cfc --- /dev/null +++ b/lib/stdlib/test/binary_module_SUITE.erl @@ -0,0 +1,96 @@ +-module(binary_module_SUITE). + +-export([all/1, interesting/1]). + +-define(STANDALONE,1). + +-ifdef(STANDALONE). + +-define(line,erlang:display({?MODULE,?LINE}),). + +-else. + +-include("test_server.hrl"). + +-endif. + + + +-ifdef(STANDALONE). +-export([run/0]). + +run() -> + [ apply(?MODULE,X,[[]]) || X <- all(suite) ]. + +-endif. + +all(suite) -> [interesting]. + + +interesting(doc) -> + ["Try some interesting patterns"]; +interesting(Config) when is_list(Config) -> + X = do_interesting(binary), + X = do_interesting(binref). + +do_interesting(Module) -> + ?line {0,4} = Module:match(<<"123456">>, + Module:compile_pattern([<<"12">>,<<"1234">>, + <<"23">>,<<"3">>, + <<"34">>,<<"456">>, + <<"45">>,<<"6">>])), + ?line [{0,4},{5,1}] = Module:matches(<<"123456">>, + Module:compile_pattern([<<"12">>,<<"1234">>, + <<"23">>,<<"3">>, + <<"34">>,<<"456">>, + <<"45">>,<<"6">>])), + ?line [{0,4}] = Module:matches(<<"123456">>, + Module:compile_pattern([<<"12">>,<<"1234">>, + <<"23">>,<<"3">>, + <<"34">>,<<"456">>, + <<"45">>])), + ?line [{0,2},{2,2}] = Module:matches(<<"123456">>, + Module:compile_pattern([<<"12">>, + <<"23">>,<<"3">>, + <<"34">>,<<"456">>, + <<"45">>])), + ?line {1,4} = Module:match(<<"123456">>, + Module:compile_pattern([<<"34">>,<<"34">>, + <<"12347">>,<<"2345">>])), + ?line [{1,4}] = Module:matches(<<"123456">>, + Module:compile_pattern([<<"34">>,<<"34">>, + <<"12347">>,<<"2345">>])), + ?line [{2,2}] = Module:matches(<<"123456">>, + Module:compile_pattern([<<"34">>,<<"34">>, + <<"12347">>,<<"2346">>])), + + ?line {0,4} = Module:match(<<"123456">>, + [<<"12">>,<<"1234">>, + <<"23">>,<<"3">>, + <<"34">>,<<"456">>, + <<"45">>,<<"6">>]), + ?line [{0,4},{5,1}] = Module:matches(<<"123456">>, + [<<"12">>,<<"1234">>, + <<"23">>,<<"3">>, + <<"34">>,<<"456">>, + <<"45">>,<<"6">>]), + ?line [{0,4}] = Module:matches(<<"123456">>, + [<<"12">>,<<"1234">>, + <<"23">>,<<"3">>, + <<"34">>,<<"456">>, + <<"45">>]), + ?line [{0,2},{2,2}] = Module:matches(<<"123456">>, + [<<"12">>, + <<"23">>,<<"3">>, + <<"34">>,<<"456">>, + <<"45">>]), + ?line {1,4} = Module:match(<<"123456">>, + [<<"34">>,<<"34">>, + <<"12347">>,<<"2345">>]), + ?line [{1,4}] = Module:matches(<<"123456">>, + [<<"34">>,<<"34">>, + <<"12347">>,<<"2345">>]), + ?line [{2,2}] = Module:matches(<<"123456">>, + [<<"34">>,<<"34">>, + <<"12347">>,<<"2346">>]), + ok. diff --git a/lib/stdlib/test/binref.erl b/lib/stdlib/test/binref.erl new file mode 100644 index 0000000000..270796c8f9 --- /dev/null +++ b/lib/stdlib/test/binref.erl @@ -0,0 +1,456 @@ +-module(binref). + +-export([compile_pattern/1,match/2,match/3,matches/2,matches/3, + split/2,split/3,replace/3,replace/4,first/1,last/1,at/2, + part/2,part/3,copy/1,copy/2,encode_unsigned/1,encode_unsigned/2, + decode_unsigned/1,decode_unsigned/2,referenced_byte_size/1]). + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% compile_pattern, a dummy +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +compile_pattern(Pattern) when is_binary(Pattern) -> + {[Pattern]}; +compile_pattern(Pattern) -> + try + [ true = is_binary(P) || P <- Pattern ], + {Pattern} + catch + _:_ -> + erlang:error(badarg) + end. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% match and matches +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +match(H,N) -> + match(H,N,[]). +match(Haystack,Needle,Options) when is_binary(Needle) -> + match(Haystack,[Needle],Options); +match(Haystack,{Needles},Options) -> + match(Haystack,Needles,Options); +match(Haystack,Needles,Options) -> + try + true = is_binary(Haystack) and is_list(Needles), % badarg, not function_clause + case get_opts_match(Options,no) of + no -> + mloop(Haystack,Needles); + {A,B} when B > 0 -> + <<_:A/binary,SubStack:B/binary,_/binary>> = Haystack, + mloop(SubStack,Needles,A,B+A); + {A,B} when B < 0 -> + Start = A + B, + Len = -B, + <<_:Start/binary,SubStack:Len/binary,_/binary>> = Haystack, + mloop(SubStack,Needles,Start,Len+Start); + _ -> + no + end + catch + _:_ -> + erlang:error(badarg) + end. +matches(H,N) -> + matches(H,N,[]). +matches(Haystack,Needle,Options) when is_binary(Needle) -> + matches(Haystack,[Needle],Options); +matches(Haystack,{Needles},Options) -> + matches(Haystack,Needles,Options); +matches(Haystack,Needles,Options) -> + try + true = is_binary(Haystack) and is_list(Needles), % badarg, not function_clause + case get_opts_match(Options,no) of + no -> + msloop(Haystack,Needles); + {A,B} when B > 0 -> + <<_:A/binary,SubStack:B/binary,_/binary>> = Haystack, + msloop(SubStack,Needles,A,B+A); + {A,B} when B < 0 -> + Start = A + B, + Len = -B, + <<_:Start/binary,SubStack:Len/binary,_/binary>> = Haystack, + msloop(SubStack,Needles,Start,Len+Start); + _ -> + [] + end + catch + _:_ -> + erlang:error(badarg) + end. + +mloop(Haystack,Needles) -> + mloop(Haystack,Needles,0,byte_size(Haystack)). + +mloop(_Haystack,_Needles,N,M) when N >= M -> + no; +mloop(Haystack,Needles,N,M) -> + case mloop2(Haystack,Needles,N,no) of + no -> + % Not found + <<_:8,NewStack/binary>> = Haystack, + mloop(NewStack,Needles,N+1,M); + {N,Len} -> + {N,Len} + end. + +msloop(Haystack,Needles) -> + msloop(Haystack,Needles,0,byte_size(Haystack)). + +msloop(_Haystack,_Needles,N,M) when N >= M -> + []; +msloop(Haystack,Needles,N,M) -> + case mloop2(Haystack,Needles,N,no) of + no -> + % Not found + <<_:8,NewStack/binary>> = Haystack, + msloop(NewStack,Needles,N+1,M); + {N,Len} -> + NewN = N+Len, + if + NewN >= M -> + [{N,Len}]; + true -> + <<_:Len/binary,NewStack/binary>> = Haystack, + [{N,Len} | msloop(NewStack,Needles,NewN,M)] + end + end. + +mloop2(_Haystack,[],_N,Res) -> + Res; +mloop2(Haystack,[Needle|Tail],N,Candidate) -> + NS = byte_size(Needle), + case Haystack of + <> -> + NewCandidate = case Candidate of + no -> + {N,NS}; + {N,ONS} when ONS < NS -> + {N,NS}; + Better -> + Better + end, + mloop2(Haystack,Tail,N,NewCandidate); + _ -> + mloop2(Haystack,Tail,N,Candidate) + end. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% split +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +split(H,N) -> + split(H,N,[]). +split(Haystack,{Needles},Options) -> + split(Haystack, Needles, Options); +split(Haystack,Needles,Options) -> + try + {Part,Global,Trim} = get_opts_split(Options,{no,false,false}), + {Start,End,NewStack} = + case Part of + no -> + {0,byte_size(Haystack),Haystack}; + {A,B} when B >= 0 -> + <<_:A/binary,SubStack:B/binary,_/binary>> = Haystack, + {A,A+B,SubStack}; + {A,B} when B < 0 -> + S = A + B, + L = -B, + <<_:S/binary,SubStack:L/binary,_/binary>> = Haystack, + {S,S+L,SubStack} + end, + MList = if + Global -> + msloop(NewStack,Needles,Start,End); + true -> + case mloop(NewStack,Needles,Start,End) of + no -> + []; + X -> + [X] + end + end, + do_split(Haystack,MList,0,Trim) + catch + _:_ -> + erlang:error(badarg) + end. + +do_split(H,[],N,true) when N >= byte_size(H) -> + []; +do_split(H,[],N,_) -> + [part(H,{N,byte_size(H)-N})]; +do_split(H,[{A,B}|T],N,Trim) -> + case part(H,{N,A-N}) of + <<>> -> + Rest = do_split(H,T,A+B,Trim), + case {Trim, Rest} of + {true,[]} -> + []; + _ -> + [<<>> | Rest] + end; + Oth -> + [Oth | do_split(H,T,A+B,Trim)] + end. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% replace +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +replace(H,N,R) -> + replace(H,N,R,[]). +replace(Haystack,{Needles},Replacement,Options) -> + replace(Haystack,Needles,Replacement,Options); + +replace(Haystack,Needles,Replacement,Options) -> + try + true = is_binary(Replacement), % Make badarg instead of function clause + {Part,Global,Insert} = get_opts_replace(Options,{no,false,[]}), + {Start,End,NewStack} = + case Part of + no -> + {0,byte_size(Haystack),Haystack}; + {A,B} when B >= 0 -> + <<_:A/binary,SubStack:B/binary,_/binary>> = Haystack, + {A,A+B,SubStack}; + {A,B} when B < 0 -> + S = A + B, + L = -B, + <<_:S/binary,SubStack:L/binary,_/binary>> = Haystack, + {S,S+L,SubStack} + end, + MList = if + Global -> + msloop(NewStack,Needles,Start,End); + true -> + case mloop(NewStack,Needles,Start,End) of + no -> + []; + X -> + [X] + end + end, + ReplList = case Insert of + [] -> + Replacement; + Y when is_integer(Y) -> + splitat(Replacement,0,[Y]); + Li when is_list(Li) -> + splitat(Replacement,0,lists:sort(Li)) + end, + erlang:iolist_to_binary(do_replace(Haystack,MList,ReplList,0)) + catch + _:_ -> + erlang:error(badarg) + end. + + +do_replace(H,[],_,N) -> + [part(H,{N,byte_size(H)-N})]; +do_replace(H,[{A,B}|T],Replacement,N) -> + [part(H,{N,A-N}), + if + is_list(Replacement) -> + do_insert(Replacement, part(H,{A,B})); + true -> + Replacement + end + | do_replace(H,T,Replacement,A+B)]. + +do_insert([X],_) -> + [X]; +do_insert([H|T],R) -> + [H,R|do_insert(T,R)]. + +splitat(H,N,[]) -> + [part(H,{N,byte_size(H)-N})]; +splitat(H,N,[I|T]) -> + [part(H,{N,I-N})|splitat(H,I,T)]. + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% first, last and at +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +first(Subject) -> + try + <> = Subject, + A + catch + _:_ -> + erlang:error(badarg) + end. + +last(Subject) -> + try + N = byte_size(Subject) - 1, + <<_:N/binary,A:8>> = Subject, + A + catch + _:_ -> + erlang:error(badarg) + end. + +at(Subject,X) -> + try + <<_:X/binary,A:8,_/binary>> = Subject, + A + catch + _:_ -> + erlang:error(badarg) + end. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% part +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +part(Subject,Part) -> + try + do_part(Subject,Part) + catch + _:_ -> + erlang:error(badarg) + end. + +part(Subject,Pos,Len) -> + part(Subject,{Pos,Len}). + +do_part(Bin,{A,B}) when B >= 0 -> + <<_:A/binary,Sub:B/binary,_/binary>> = Bin, + Sub; +do_part(Bin,{A,B}) when B < 0 -> + S = A + B, + L = -B, + <<_:S/binary,Sub:L/binary,_/binary>> = Bin, + Sub. + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% copy +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +copy(Subject) -> + copy(Subject,1). +copy(Subject,N) -> + try + true = is_integer(N) and (N > 0) and is_binary(Subject), % Badarg, not function clause + erlang:list_to_binary(lists:duplicate(N,Subject)) + catch + _:_ -> + erlang:error(badarg) + end. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% encode_unsigned +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +encode_unsigned(Unsigned) -> + encode_unsigned(Unsigned,big). +encode_unsigned(Unsigned,Endian) -> + try + true = is_integer(Unsigned) and (Unsigned >= 0), + if + Unsigned =:= 0 -> + <<0>>; + true -> + case Endian of + big -> + list_to_binary(do_encode(Unsigned,[])); + little -> + list_to_binary(do_encode_r(Unsigned)) + end + end + catch + _:_ -> + erlang:error(badarg) + end. + +do_encode(0,L) -> + L; +do_encode(N,L) -> + Byte = N band 255, + NewN = N bsr 8, + do_encode(NewN,[Byte|L]). + +do_encode_r(0) -> + []; +do_encode_r(N) -> + Byte = N band 255, + NewN = N bsr 8, + [Byte|do_encode_r(NewN)]. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% decode_unsigned +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +decode_unsigned(Subject) -> + decode_unsigned(Subject,big). + +decode_unsigned(Subject,Endian) -> + try + true = is_binary(Subject) and (byte_size(Subject) > 0), + case Endian of + big -> + do_decode(Subject,0); + little -> + do_decode_r(Subject,0) + end + catch + _:_ -> + erlang:error(badarg) + end. + +do_decode(<<>>,N) -> + N; +do_decode(<>,N) -> + do_decode(Bin,(N bsl 8) bor X). + +do_decode_r(<<>>,N) -> + N; +do_decode_r(Bin,N) -> + Sz = byte_size(Bin) - 1, + <> = Bin, + do_decode_r(NewBin, (N bsl 8) bor X). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% referenced_byte_size cannot +%% be implemented in pure +%% erlang +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +referenced_byte_size(Bin) when is_binary(Bin) -> + erlang:error(not_implemented); +referenced_byte_size(_) -> + erlang:error(badarg). + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% Simple helper functions +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% Option "parsing" +get_opts_match([],Part) -> + Part; +get_opts_match([{scope,{A,B}} | T],_Part) -> + get_opts_match(T,{A,B}); +get_opts_match(_,_) -> + throw(badopt). + +get_opts_split([],{Part,Global,Trim}) -> + {Part,Global,Trim}; +get_opts_split([{scope,{A,B}} | T],{_Part,Global,Trim}) -> + get_opts_split(T,{{A,B},Global,Trim}); +get_opts_split([global | T],{Part,_Global,Trim}) -> + get_opts_split(T,{Part,true,Trim}); +get_opts_split([trim | T],{Part,Global,_Trim}) -> + get_opts_split(T,{Part,Global,true}); +get_opts_split(_,_) -> + throw(badopt). + +get_opts_replace([],{Part,Global,Insert}) -> + {Part,Global,Insert}; +get_opts_replace([{scope,{A,B}} | T],{_Part,Global,Insert}) -> + get_opts_replace(T,{{A,B},Global,Insert}); +get_opts_replace([global | T],{Part,_Global,Insert}) -> + get_opts_replace(T,{Part,true,Insert}); +get_opts_replace([{insert_replaced,N} | T],{Part,Global,_Insert}) -> + get_opts_replace(T,{Part,Global,N}); +get_opts_replace(_,_) -> + throw(badopt). -- cgit v1.2.3