aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPatrik Nyblom <pan@arwen.du.uab.ericsson.se>2009-11-02 15:55:20 +0100
committerBjörn Gustavsson <bjorn@erlang.org>2010-05-17 15:51:49 +0200
commite0c4c2867c20368c5b5d88cbbf92da7b7a3f386e (patch)
tree91081527b0b2c88f595af3ea067a08fdc6df6861
parentfdc8980231b1e791ec4b8f8f3d61a7ba7dda539b (diff)
downloadotp-e0c4c2867c20368c5b5d88cbbf92da7b7a3f386e.tar.gz
otp-e0c4c2867c20368c5b5d88cbbf92da7b7a3f386e.tar.bz2
otp-e0c4c2867c20368c5b5d88cbbf92da7b7a3f386e.zip
Teach binary.c the semantics to take longest instead of shortest match
Add testcase embryos and reference implementation. Change name of compile function according to EEP31.
-rw-r--r--erts/emulator/beam/bif.tab2
-rw-r--r--erts/emulator/beam/binary.c411
-rw-r--r--lib/stdlib/src/binary.erl4
-rw-r--r--lib/stdlib/test/binary_module_SUITE.erl96
-rw-r--r--lib/stdlib/test/binref.erl456
5 files changed, 779 insertions, 190 deletions
diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab
index 8d81aa7eba..3f51e6dc45 100644
--- a/erts/emulator/beam/bif.tab
+++ b/erts/emulator/beam/bif.tab
@@ -760,7 +760,7 @@ bif erlang:finish_after_on_load/2
#
# The searching/splitting/substituting thingies
#
-bif binary:match_compile/1
+bif binary:compile_pattern/1
bif binary:match/2
bif binary:match/3
bif binary:matches/2
diff --git a/erts/emulator/beam/binary.c b/erts/emulator/beam/binary.c
index 29c1af2114..4662e60d51 100644
--- a/erts/emulator/beam/binary.c
+++ b/erts/emulator/beam/binary.c
@@ -802,13 +802,12 @@ static void cleanup_my_data(Binary *bp)
* Initiate a (allocated) micro allocator and fill in the base
* for an Aho-Corasick search trie, given the accumulated length of the search strings.
*/
-static ACTrie *create_acdata(MyAllocator *my, Process *p, Uint len,
- ACNode ***qbuff /* out */,Eterm *the_bin /* out */)
+static ACTrie *create_acdata(MyAllocator *my, Uint len,
+ ACNode ***qbuff /* out */,Binary **the_bin /* out */)
{
Uint datasize = AC_SIZE(len);
ACTrie *act;
ACNode *acn;
- Eterm *hp;
Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data);
byte *data = ERTS_MAGIC_BIN_DATA(mb);
@@ -826,19 +825,17 @@ static ACTrie *create_acdata(MyAllocator *my, Process *p, Uint len,
acn->id = 0;
#endif
*qbuff = erts_alloc(ERTS_ALC_T_TMP, sizeof(ACNode *) * len);
- hp = HAlloc(p, PROC_BIN_SIZE);
- *the_bin = erts_mk_magic_binary_term(&hp, &MSO(p), mb);
+ *the_bin = mb;
return act;
}
/*
* The same initialization of allocator and basic data for Boyer-More.
*/
-static BMData *create_bmdata(MyAllocator *my, Process *p, byte *x, Uint len, Eterm *the_bin /* out */)
+static BMData *create_bmdata(MyAllocator *my, byte *x, Uint len, Binary **the_bin /* out */)
{
Uint datasize = BM_SIZE(len);
BMData *bmd;
- Eterm *hp;
Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data);
byte *data = ERTS_MAGIC_BIN_DATA(mb);
init_my_allocator(my, datasize, data);
@@ -848,8 +845,7 @@ static BMData *create_bmdata(MyAllocator *my, Process *p, byte *x, Uint len, Ete
bmd->len = len;
bmd->goodshift = my_alloc(my,sizeof(Uint) * len);
bmd->ret_tuple = 0;
- hp = HAlloc(p, PROC_BIN_SIZE);
- *the_bin = erts_mk_magic_binary_term(&hp, &MSO(p), mb);
+ *the_bin = mb;
return bmd;
}
@@ -998,7 +994,8 @@ static Uint ac_find_first_match(ACTrie *act, byte *haystack, Uint len,
while (r->final < 0)
r = r->h;
rstart = i - r->d;
- if (candidate == NULL || rstart < candidate_start) {
+ if (candidate == NULL || rstart < candidate_start ||
+ (rstart == candidate_start && candidate->d < q->d)) {
candidate_start = rstart;
candidate = r;
}
@@ -1063,12 +1060,21 @@ static Uint ac_find_all_non_overlapping(ACTrie *act, byte *haystack, Uint len,
#endif
rstart = i - r->d;
save_m = m;
- while (m > 0 && out[m-1].pos > rstart) {
+ while (m > 0 && (out[m-1].pos > rstart ||
+ (out[m-1].pos == rstart &&
+ out[m-1].len < r->d))) {
#ifdef HARDDEBUG
erts_printf("Popping %u\n",(unsigned) out[m-1].id);
#endif
--m;
}
+#ifdef HARDDEBUG
+ if (m > 0) {
+ erts_printf("Pos %u\n",out[m-1].pos);
+ erts_printf("Len %u\n",out[m-1].len);
+ }
+ erts_printf("Rstart %u\n",rstart);
+#endif
if (m == 0 || out[m-1].pos + out[m-1].len <= rstart) {
if (m >= allocated) {
if (!allocated) {
@@ -1213,7 +1219,8 @@ static Sint bm_find_first_match(BMData *bmd, byte *haystack, Uint len)
/*
* Search functionality interfaces
*/
-BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1)
+
+static int do_binary_match_compile(Eterm argument, Eterm *tag, Binary **binp)
{
Eterm t, b, comp_term = NIL;
Uint characters;
@@ -1223,9 +1230,9 @@ BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1)
characters = 0;
words = 0;
- if (is_list(BIF_ARG_1)) {
+ if (is_list(argument)) {
return_tuple = 1;
- t = BIF_ARG_1;
+ t = argument;
while (is_list(t)) {
b = CAR(list_val(t));
t = CDR(list_val(t));
@@ -1242,17 +1249,17 @@ BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1)
goto badarg;
}
if (words > 1) {
- comp_term = BIF_ARG_1;
+ comp_term = argument;
} else {
- comp_term = CAR(list_val(BIF_ARG_1));
+ comp_term = CAR(list_val(argument));
}
- } else if (is_binary(BIF_ARG_1)) {
- if (binary_bitsize(BIF_ARG_1) != 0) {
+ } else if (is_binary(argument)) {
+ if (binary_bitsize(argument) != 0) {
goto badarg;
}
words = 1;
- comp_term = BIF_ARG_1;
- characters = binary_size(BIF_ARG_1);
+ comp_term = argument;
+ characters = binary_size(argument);
}
if (characters == 0) {
@@ -1261,35 +1268,33 @@ BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1)
ASSERT(words > 0);
if (words == 1) {
- Eterm ret;
byte *bytes;
Uint bitoffs, bitsize;
byte *temp_alloc = NULL;
MyAllocator my;
BMData *bmd;
- Eterm *hp;
+ Binary *bin;
ERTS_GET_BINARY_BYTES(comp_term, bytes, bitoffs, bitsize);
if (bitoffs != 0) {
bytes = erts_get_aligned_binary_bytes(comp_term, &temp_alloc);
}
- bmd = create_bmdata(&my, BIF_P, bytes, characters, &ret);
+ bmd = create_bmdata(&my, bytes, characters, &bin);
bmd->ret_tuple = return_tuple;
compute_badshifts(bmd);
compute_goodshifts(bmd);
erts_free_aligned_binary_bytes(temp_alloc);
CHECK_ALLOCATOR(my);
- hp = HAlloc(BIF_P,3);
- ret = TUPLE2(hp, am_bm, ret);
- BIF_RET(ret);
+ *tag = am_bm;
+ *binp = bin;
+ return 0;
} else {
- Eterm ret;
ACTrie *act;
MyAllocator my;
- Eterm *hp;
ACNode **qbuff;
+ Binary *bin;
- act = create_acdata(&my, BIF_P, characters, &qbuff, &ret);
+ act = create_acdata(&my, characters, &qbuff, &bin);
t = comp_term;
while (is_list(t)) {
byte *bytes;
@@ -1307,18 +1312,37 @@ BIF_RETTYPE binary_match_compile_1(BIF_ALIST_1)
ac_compute_failure_functions(act,qbuff);
CHECK_ALLOCATOR(my);
erts_free(ERTS_ALC_T_TMP,qbuff);
- hp = HAlloc(BIF_P,3);
- ret = TUPLE2(hp, am_ac, ret);
- BIF_RET(ret);
+ *tag = am_ac;
+ *binp = bin;
+ return 0;
}
badarg:
- BIF_ERROR(BIF_P,BADARG);
+ return -1;
+}
+
+BIF_RETTYPE binary_compile_pattern_1(BIF_ALIST_1)
+{
+ Binary *bin;
+ Eterm tag, ret;
+ Eterm *hp;
+
+ if (do_binary_match_compile(BIF_ARG_1,&tag,&bin)) {
+ BIF_ERROR(BIF_P,BADARG);
+ }
+ hp = HAlloc(BIF_P, PROC_BIN_SIZE+3);
+ ret = erts_mk_magic_binary_term(&hp, &MSO(BIF_P), bin);
+ ret = TUPLE2(hp, tag, ret);
+ BIF_RET(ret);
}
+
BIF_RETTYPE binary_match_3(BIF_ALIST_3)
{
Uint hsstart, hslen;
Eterm *tp;
+ Eterm type;
+ Binary *bin;
+ Eterm bin_term = NIL;
if (is_not_binary(BIF_ARG_1)) {
goto badarg;
}
@@ -1355,85 +1379,90 @@ BIF_RETTYPE binary_match_3(BIF_ALIST_3)
if (arityval(*tp) != 2 || is_not_atom(tp[1])) {
goto badarg;
}
- if (tp[1] == am_bm && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) {
- Binary *mbp;
- BMData *bm;
- Sint pos;
- byte *bytes;
- Uint bitoffs, bitsize;
- byte *temp_alloc = NULL;
- Eterm ret;
- Eterm *hp;
- mbp = ((ProcBin *) binary_val(tp[2]))->val;
- if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) {
- goto badarg;
- }
- bm = (BMData *) ERTS_MAGIC_BIN_DATA(mbp);
+ if (((tp[1] != am_bm) && (tp[1] != am_ac)) ||
+ !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) {
+ goto badarg;
+ }
+ type = tp[1];
+ bin = ((ProcBin *) binary_val(tp[2]))->val;
+ if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) {
+ goto badarg;
+ }
+ bin_term = tp[2];
+ } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) {
+ goto badarg;
+ }
+
+ if (type == am_bm) {
+ BMData *bm;
+ Sint pos;
+ byte *bytes;
+ Uint bitoffs, bitsize;
+ byte *temp_alloc = NULL;
+ Eterm ret;
+ Eterm *hp;
+ bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin);
#ifdef HARDDEBUG
- dump_bm_data(bm);
+ dump_bm_data(bm);
#endif
- ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize);
- if (bitsize != 0) {
- goto badarg;
- }
- if (bitoffs != 0) {
- bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc);
- }
- pos = bm_find_first_match(bm, bytes + hsstart, hslen);
- if (pos < 0) {
- ret = am_nomatch;
- } else {
- Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P);
- ret = erts_make_integer(pos+hsstart,BIF_P);
- if (bm->ret_tuple) {
- hp = HAlloc(BIF_P,3);
- ret = TUPLE2(hp, ret, erlen);
- }
+ ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize);
+ if (bitsize != 0) {
+ goto badarg;
+ }
+ if (bitoffs != 0) {
+ bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc);
+ }
+ pos = bm_find_first_match(bm, bytes + hsstart, hslen);
+ if (pos < 0) {
+ ret = am_nomatch;
+ } else {
+ Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P);
+ ret = erts_make_integer(pos+hsstart,BIF_P);
+ if (bm->ret_tuple) {
+ hp = HAlloc(BIF_P,3);
+ ret = TUPLE2(hp, ret, erlen);
}
- erts_free_aligned_binary_bytes(temp_alloc);
- BIF_RET(ret);
- } else if (tp[1] == am_ac && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) {
- Binary *mbp;
- ACTrie *act;
- Uint pos, msn,rlen;
- byte *bytes;
- Uint bitoffs, bitsize;
- byte *temp_alloc = NULL;
- Eterm ret;
- Eterm *hp;
+ }
+ erts_free_aligned_binary_bytes(temp_alloc);
+ if (bin_term == NIL) {
+ erts_bin_free(bin);
+ }
+ BIF_RET(ret);
+ } else if (type == am_ac) {
+ ACTrie *act;
+ Uint pos, msn,rlen;
+ byte *bytes;
+ Uint bitoffs, bitsize;
+ byte *temp_alloc = NULL;
+ Eterm ret;
+ Eterm *hp;
- mbp = ((ProcBin *) binary_val(tp[2]))->val;
- if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) {
- goto badarg;
- }
- act = (ACTrie *) ERTS_MAGIC_BIN_DATA(mbp);
+ act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin);
#ifdef HARDDEBUG
- dump_ac_trie(act);
+ dump_ac_trie(act);
#endif
- ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize);
- if (bitsize != 0) {
- goto badarg;
- }
- if (bitoffs != 0) {
- bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc);
- }
- msn = ac_find_first_match(act, bytes + hsstart,
- hslen, &pos, &rlen);
- if (msn == 0) {
- ret = am_nomatch;
- } else {
- Eterm epos = erts_make_integer(pos+hsstart,BIF_P);
- Eterm erlen = erts_make_integer(rlen,BIF_P);
- hp = HAlloc(BIF_P,3);
- ret = TUPLE2(hp, epos, erlen);
- }
- erts_free_aligned_binary_bytes(temp_alloc);
- BIF_RET(ret);
- } else {
+ ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize);
+ if (bitsize != 0) {
goto badarg;
}
- } else {
- goto badarg; /* Compilation on the fly NYI */
+ if (bitoffs != 0) {
+ bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc);
+ }
+ msn = ac_find_first_match(act, bytes + hsstart,
+ hslen, &pos, &rlen);
+ if (msn == 0) {
+ ret = am_nomatch;
+ } else {
+ Eterm epos = erts_make_integer(pos+hsstart,BIF_P);
+ Eterm erlen = erts_make_integer(rlen,BIF_P);
+ hp = HAlloc(BIF_P,3);
+ ret = TUPLE2(hp, epos, erlen);
+ }
+ erts_free_aligned_binary_bytes(temp_alloc);
+ if (bin_term == NIL) {
+ erts_bin_free(bin);
+ }
+ BIF_RET(ret);
}
badarg:
BIF_ERROR(BIF_P,BADARG);
@@ -1447,6 +1476,9 @@ BIF_RETTYPE binary_matches_3(BIF_ALIST_3)
{
Uint hsstart, hslen;
Eterm *tp;
+ Eterm type;
+ Binary *bin;
+ Eterm bin_term = NIL;
if (is_not_binary(BIF_ARG_1)) {
goto badarg;
}
@@ -1483,98 +1515,103 @@ BIF_RETTYPE binary_matches_3(BIF_ALIST_3)
if (arityval(*tp) != 2 || is_not_atom(tp[1])) {
goto badarg;
}
- if (tp[1] == am_bm && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) {
- Binary *mbp;
- BMData *bm;
- Sint pos;
- byte *bytes;
- Uint bitoffs, bitsize;
- byte *temp_alloc = NULL;
- Eterm ret;
- Eterm *hp;
- mbp = ((ProcBin *) binary_val(tp[2]))->val;
- if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) {
- goto badarg;
- }
- bm = (BMData *) ERTS_MAGIC_BIN_DATA(mbp);
+ if (((tp[1] != am_bm) && (tp[1] != am_ac)) ||
+ !ERTS_TERM_IS_MAGIC_BINARY(tp[2])) {
+ goto badarg;
+ }
+ type = tp[1];
+ bin = ((ProcBin *) binary_val(tp[2]))->val;
+ if (ERTS_MAGIC_BIN_DESTRUCTOR(bin) != cleanup_my_data) {
+ goto badarg;
+ }
+ bin_term = tp[2];
+ } else if (do_binary_match_compile(BIF_ARG_2,&type,&bin)) {
+ goto badarg;
+ }
+
+ if (type == am_bm) {
+ BMData *bm;
+ Sint pos;
+ byte *bytes;
+ Uint bitoffs, bitsize;
+ byte *temp_alloc = NULL;
+ Eterm ret;
+ Eterm *hp;
+ bm = (BMData *) ERTS_MAGIC_BIN_DATA(bin);
#ifdef HARDDEBUG
- dump_bm_data(bm);
+ dump_bm_data(bm);
#endif
- ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize);
- if (bitsize != 0) {
- goto badarg;
- }
- if (bitoffs != 0) {
- bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc);
- }
- pos = bm_find_first_match(bm, bytes + hsstart, hslen);
- if (pos < 0) {
- ret = am_nomatch;
- } else {
- Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P);
- ret = erts_make_integer(pos,BIF_P);
- if (bm->ret_tuple) {
- hp = HAlloc(BIF_P,3);
- ret = TUPLE2(hp, ret, erlen);
- }
+ ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize);
+ if (bitsize != 0) {
+ goto badarg;
+ }
+ if (bitoffs != 0) {
+ bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc);
+ }
+ pos = bm_find_first_match(bm, bytes + hsstart, hslen);
+ if (pos < 0) {
+ ret = am_nomatch;
+ } else {
+ Eterm erlen = erts_make_integer((Uint) bm->len, BIF_P);
+ ret = erts_make_integer(pos,BIF_P);
+ if (bm->ret_tuple) {
+ hp = HAlloc(BIF_P,3);
+ ret = TUPLE2(hp, ret, erlen);
}
- erts_free_aligned_binary_bytes(temp_alloc);
- BIF_RET(ret);
- } else if (tp[1] == am_ac && ERTS_TERM_IS_MAGIC_BINARY(tp[2])) {
- Binary *mbp;
- ACTrie *act;
- Uint rlen;
- Sint i;
- FindallData *fad;
- byte *bytes;
- Uint bitoffs, bitsize;
- byte *temp_alloc = NULL;
- Eterm ret,tpl;
- Eterm *hp;
+ }
+ erts_free_aligned_binary_bytes(temp_alloc);
+ if (bin_term == NIL) {
+ erts_bin_free(bin);
+ }
+ BIF_RET(ret);
+ } else if (type == am_ac) {
+ ACTrie *act;
+ Uint rlen;
+ Sint i;
+ FindallData *fad;
+ byte *bytes;
+ Uint bitoffs, bitsize;
+ byte *temp_alloc = NULL;
+ Eterm ret,tpl;
+ Eterm *hp;
- mbp = ((ProcBin *) binary_val(tp[2]))->val;
- if (ERTS_MAGIC_BIN_DESTRUCTOR(mbp) != cleanup_my_data) {
- goto badarg;
- }
- act = (ACTrie *) ERTS_MAGIC_BIN_DATA(mbp);
+ act = (ACTrie *) ERTS_MAGIC_BIN_DATA(bin);
#ifdef HARDDEBUG
- dump_ac_trie(act);
+ dump_ac_trie(act);
#endif
- ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize);
- if (bitsize != 0) {
- goto badarg;
- }
- if (bitoffs != 0) {
- bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc);
- }
- rlen = ac_find_all_non_overlapping(act, bytes + hsstart,
- hslen, &fad);
- if (rlen == 0) {
- ret = am_nomatch;
- } else {
- for (i = 0; i < rlen; ++i) {
- fad[i].epos = erts_make_integer(fad[i].pos,BIF_P);
- fad[i].elen = erts_make_integer(fad[i].len,BIF_P);
- }
- hp = HAlloc(BIF_P,rlen * (3 + 2));
- ret = NIL;
- for (i = rlen - 1; i >= 0; --i) {
- tpl = TUPLE2(hp, fad[i].epos, fad[i].elen);
- hp +=3;
- ret = CONS(hp,tpl,ret);
- hp += 2;
- }
+ ERTS_GET_BINARY_BYTES(BIF_ARG_1, bytes, bitoffs, bitsize);
+ if (bitsize != 0) {
+ goto badarg;
+ }
+ if (bitoffs != 0) {
+ bytes = erts_get_aligned_binary_bytes(BIF_ARG_1, &temp_alloc);
+ }
+ rlen = ac_find_all_non_overlapping(act, bytes + hsstart,
+ hslen, &fad);
+ if (rlen == 0) {
+ ret = am_nomatch;
+ } else {
+ for (i = 0; i < rlen; ++i) {
+ fad[i].epos = erts_make_integer(fad[i].pos,BIF_P);
+ fad[i].elen = erts_make_integer(fad[i].len,BIF_P);
}
- erts_free_aligned_binary_bytes(temp_alloc);
- if (fad != NULL) {
- erts_free(ERTS_ALC_T_TMP,fad);
+ hp = HAlloc(BIF_P,rlen * (3 + 2));
+ ret = NIL;
+ for (i = rlen - 1; i >= 0; --i) {
+ tpl = TUPLE2(hp, fad[i].epos, fad[i].elen);
+ hp +=3;
+ ret = CONS(hp,tpl,ret);
+ hp += 2;
}
- BIF_RET(ret);
- } else {
- goto badarg;
}
- } else {
- goto badarg; /* Compilation on the fly NYI */
+ erts_free_aligned_binary_bytes(temp_alloc);
+ if (fad != NULL) {
+ erts_free(ERTS_ALC_T_TMP,fad);
+ }
+ if (bin_term == NIL) {
+ erts_bin_free(bin);
+ }
+ BIF_RET(ret);
}
badarg:
BIF_ERROR(BIF_P,BADARG);
diff --git a/lib/stdlib/src/binary.erl b/lib/stdlib/src/binary.erl
index f2774f8c84..d649f974bb 100644
--- a/lib/stdlib/src/binary.erl
+++ b/lib/stdlib/src/binary.erl
@@ -19,8 +19,8 @@
-module(binary).
%%
%% The following functions implemented as BIF's
-%% binary:match_compile/1
-%% binary:match/3
+%% binary:compile_pattern/1
+%% binary:find/3
%% XXX:PaN more to come...
-export([first/1,first/2,last/1,last/2,nth/2,extract/3]).
diff --git a/lib/stdlib/test/binary_module_SUITE.erl b/lib/stdlib/test/binary_module_SUITE.erl
new file mode 100644
index 0000000000..31d0812cfc
--- /dev/null
+++ b/lib/stdlib/test/binary_module_SUITE.erl
@@ -0,0 +1,96 @@
+-module(binary_module_SUITE).
+
+-export([all/1, interesting/1]).
+
+-define(STANDALONE,1).
+
+-ifdef(STANDALONE).
+
+-define(line,erlang:display({?MODULE,?LINE}),).
+
+-else.
+
+-include("test_server.hrl").
+
+-endif.
+
+
+
+-ifdef(STANDALONE).
+-export([run/0]).
+
+run() ->
+ [ apply(?MODULE,X,[[]]) || X <- all(suite) ].
+
+-endif.
+
+all(suite) -> [interesting].
+
+
+interesting(doc) ->
+ ["Try some interesting patterns"];
+interesting(Config) when is_list(Config) ->
+ X = do_interesting(binary),
+ X = do_interesting(binref).
+
+do_interesting(Module) ->
+ ?line {0,4} = Module:match(<<"123456">>,
+ Module:compile_pattern([<<"12">>,<<"1234">>,
+ <<"23">>,<<"3">>,
+ <<"34">>,<<"456">>,
+ <<"45">>,<<"6">>])),
+ ?line [{0,4},{5,1}] = Module:matches(<<"123456">>,
+ Module:compile_pattern([<<"12">>,<<"1234">>,
+ <<"23">>,<<"3">>,
+ <<"34">>,<<"456">>,
+ <<"45">>,<<"6">>])),
+ ?line [{0,4}] = Module:matches(<<"123456">>,
+ Module:compile_pattern([<<"12">>,<<"1234">>,
+ <<"23">>,<<"3">>,
+ <<"34">>,<<"456">>,
+ <<"45">>])),
+ ?line [{0,2},{2,2}] = Module:matches(<<"123456">>,
+ Module:compile_pattern([<<"12">>,
+ <<"23">>,<<"3">>,
+ <<"34">>,<<"456">>,
+ <<"45">>])),
+ ?line {1,4} = Module:match(<<"123456">>,
+ Module:compile_pattern([<<"34">>,<<"34">>,
+ <<"12347">>,<<"2345">>])),
+ ?line [{1,4}] = Module:matches(<<"123456">>,
+ Module:compile_pattern([<<"34">>,<<"34">>,
+ <<"12347">>,<<"2345">>])),
+ ?line [{2,2}] = Module:matches(<<"123456">>,
+ Module:compile_pattern([<<"34">>,<<"34">>,
+ <<"12347">>,<<"2346">>])),
+
+ ?line {0,4} = Module:match(<<"123456">>,
+ [<<"12">>,<<"1234">>,
+ <<"23">>,<<"3">>,
+ <<"34">>,<<"456">>,
+ <<"45">>,<<"6">>]),
+ ?line [{0,4},{5,1}] = Module:matches(<<"123456">>,
+ [<<"12">>,<<"1234">>,
+ <<"23">>,<<"3">>,
+ <<"34">>,<<"456">>,
+ <<"45">>,<<"6">>]),
+ ?line [{0,4}] = Module:matches(<<"123456">>,
+ [<<"12">>,<<"1234">>,
+ <<"23">>,<<"3">>,
+ <<"34">>,<<"456">>,
+ <<"45">>]),
+ ?line [{0,2},{2,2}] = Module:matches(<<"123456">>,
+ [<<"12">>,
+ <<"23">>,<<"3">>,
+ <<"34">>,<<"456">>,
+ <<"45">>]),
+ ?line {1,4} = Module:match(<<"123456">>,
+ [<<"34">>,<<"34">>,
+ <<"12347">>,<<"2345">>]),
+ ?line [{1,4}] = Module:matches(<<"123456">>,
+ [<<"34">>,<<"34">>,
+ <<"12347">>,<<"2345">>]),
+ ?line [{2,2}] = Module:matches(<<"123456">>,
+ [<<"34">>,<<"34">>,
+ <<"12347">>,<<"2346">>]),
+ ok.
diff --git a/lib/stdlib/test/binref.erl b/lib/stdlib/test/binref.erl
new file mode 100644
index 0000000000..270796c8f9
--- /dev/null
+++ b/lib/stdlib/test/binref.erl
@@ -0,0 +1,456 @@
+-module(binref).
+
+-export([compile_pattern/1,match/2,match/3,matches/2,matches/3,
+ split/2,split/3,replace/3,replace/4,first/1,last/1,at/2,
+ part/2,part/3,copy/1,copy/2,encode_unsigned/1,encode_unsigned/2,
+ decode_unsigned/1,decode_unsigned/2,referenced_byte_size/1]).
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% compile_pattern, a dummy
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+compile_pattern(Pattern) when is_binary(Pattern) ->
+ {[Pattern]};
+compile_pattern(Pattern) ->
+ try
+ [ true = is_binary(P) || P <- Pattern ],
+ {Pattern}
+ catch
+ _:_ ->
+ erlang:error(badarg)
+ end.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% match and matches
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+match(H,N) ->
+ match(H,N,[]).
+match(Haystack,Needle,Options) when is_binary(Needle) ->
+ match(Haystack,[Needle],Options);
+match(Haystack,{Needles},Options) ->
+ match(Haystack,Needles,Options);
+match(Haystack,Needles,Options) ->
+ try
+ true = is_binary(Haystack) and is_list(Needles), % badarg, not function_clause
+ case get_opts_match(Options,no) of
+ no ->
+ mloop(Haystack,Needles);
+ {A,B} when B > 0 ->
+ <<_:A/binary,SubStack:B/binary,_/binary>> = Haystack,
+ mloop(SubStack,Needles,A,B+A);
+ {A,B} when B < 0 ->
+ Start = A + B,
+ Len = -B,
+ <<_:Start/binary,SubStack:Len/binary,_/binary>> = Haystack,
+ mloop(SubStack,Needles,Start,Len+Start);
+ _ ->
+ no
+ end
+ catch
+ _:_ ->
+ erlang:error(badarg)
+ end.
+matches(H,N) ->
+ matches(H,N,[]).
+matches(Haystack,Needle,Options) when is_binary(Needle) ->
+ matches(Haystack,[Needle],Options);
+matches(Haystack,{Needles},Options) ->
+ matches(Haystack,Needles,Options);
+matches(Haystack,Needles,Options) ->
+ try
+ true = is_binary(Haystack) and is_list(Needles), % badarg, not function_clause
+ case get_opts_match(Options,no) of
+ no ->
+ msloop(Haystack,Needles);
+ {A,B} when B > 0 ->
+ <<_:A/binary,SubStack:B/binary,_/binary>> = Haystack,
+ msloop(SubStack,Needles,A,B+A);
+ {A,B} when B < 0 ->
+ Start = A + B,
+ Len = -B,
+ <<_:Start/binary,SubStack:Len/binary,_/binary>> = Haystack,
+ msloop(SubStack,Needles,Start,Len+Start);
+ _ ->
+ []
+ end
+ catch
+ _:_ ->
+ erlang:error(badarg)
+ end.
+
+mloop(Haystack,Needles) ->
+ mloop(Haystack,Needles,0,byte_size(Haystack)).
+
+mloop(_Haystack,_Needles,N,M) when N >= M ->
+ no;
+mloop(Haystack,Needles,N,M) ->
+ case mloop2(Haystack,Needles,N,no) of
+ no ->
+ % Not found
+ <<_:8,NewStack/binary>> = Haystack,
+ mloop(NewStack,Needles,N+1,M);
+ {N,Len} ->
+ {N,Len}
+ end.
+
+msloop(Haystack,Needles) ->
+ msloop(Haystack,Needles,0,byte_size(Haystack)).
+
+msloop(_Haystack,_Needles,N,M) when N >= M ->
+ [];
+msloop(Haystack,Needles,N,M) ->
+ case mloop2(Haystack,Needles,N,no) of
+ no ->
+ % Not found
+ <<_:8,NewStack/binary>> = Haystack,
+ msloop(NewStack,Needles,N+1,M);
+ {N,Len} ->
+ NewN = N+Len,
+ if
+ NewN >= M ->
+ [{N,Len}];
+ true ->
+ <<_:Len/binary,NewStack/binary>> = Haystack,
+ [{N,Len} | msloop(NewStack,Needles,NewN,M)]
+ end
+ end.
+
+mloop2(_Haystack,[],_N,Res) ->
+ Res;
+mloop2(Haystack,[Needle|Tail],N,Candidate) ->
+ NS = byte_size(Needle),
+ case Haystack of
+ <<Needle:NS/binary,_/binary>> ->
+ NewCandidate = case Candidate of
+ no ->
+ {N,NS};
+ {N,ONS} when ONS < NS ->
+ {N,NS};
+ Better ->
+ Better
+ end,
+ mloop2(Haystack,Tail,N,NewCandidate);
+ _ ->
+ mloop2(Haystack,Tail,N,Candidate)
+ end.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% split
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+split(H,N) ->
+ split(H,N,[]).
+split(Haystack,{Needles},Options) ->
+ split(Haystack, Needles, Options);
+split(Haystack,Needles,Options) ->
+ try
+ {Part,Global,Trim} = get_opts_split(Options,{no,false,false}),
+ {Start,End,NewStack} =
+ case Part of
+ no ->
+ {0,byte_size(Haystack),Haystack};
+ {A,B} when B >= 0 ->
+ <<_:A/binary,SubStack:B/binary,_/binary>> = Haystack,
+ {A,A+B,SubStack};
+ {A,B} when B < 0 ->
+ S = A + B,
+ L = -B,
+ <<_:S/binary,SubStack:L/binary,_/binary>> = Haystack,
+ {S,S+L,SubStack}
+ end,
+ MList = if
+ Global ->
+ msloop(NewStack,Needles,Start,End);
+ true ->
+ case mloop(NewStack,Needles,Start,End) of
+ no ->
+ [];
+ X ->
+ [X]
+ end
+ end,
+ do_split(Haystack,MList,0,Trim)
+ catch
+ _:_ ->
+ erlang:error(badarg)
+ end.
+
+do_split(H,[],N,true) when N >= byte_size(H) ->
+ [];
+do_split(H,[],N,_) ->
+ [part(H,{N,byte_size(H)-N})];
+do_split(H,[{A,B}|T],N,Trim) ->
+ case part(H,{N,A-N}) of
+ <<>> ->
+ Rest = do_split(H,T,A+B,Trim),
+ case {Trim, Rest} of
+ {true,[]} ->
+ [];
+ _ ->
+ [<<>> | Rest]
+ end;
+ Oth ->
+ [Oth | do_split(H,T,A+B,Trim)]
+ end.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% replace
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+replace(H,N,R) ->
+ replace(H,N,R,[]).
+replace(Haystack,{Needles},Replacement,Options) ->
+ replace(Haystack,Needles,Replacement,Options);
+
+replace(Haystack,Needles,Replacement,Options) ->
+ try
+ true = is_binary(Replacement), % Make badarg instead of function clause
+ {Part,Global,Insert} = get_opts_replace(Options,{no,false,[]}),
+ {Start,End,NewStack} =
+ case Part of
+ no ->
+ {0,byte_size(Haystack),Haystack};
+ {A,B} when B >= 0 ->
+ <<_:A/binary,SubStack:B/binary,_/binary>> = Haystack,
+ {A,A+B,SubStack};
+ {A,B} when B < 0 ->
+ S = A + B,
+ L = -B,
+ <<_:S/binary,SubStack:L/binary,_/binary>> = Haystack,
+ {S,S+L,SubStack}
+ end,
+ MList = if
+ Global ->
+ msloop(NewStack,Needles,Start,End);
+ true ->
+ case mloop(NewStack,Needles,Start,End) of
+ no ->
+ [];
+ X ->
+ [X]
+ end
+ end,
+ ReplList = case Insert of
+ [] ->
+ Replacement;
+ Y when is_integer(Y) ->
+ splitat(Replacement,0,[Y]);
+ Li when is_list(Li) ->
+ splitat(Replacement,0,lists:sort(Li))
+ end,
+ erlang:iolist_to_binary(do_replace(Haystack,MList,ReplList,0))
+ catch
+ _:_ ->
+ erlang:error(badarg)
+ end.
+
+
+do_replace(H,[],_,N) ->
+ [part(H,{N,byte_size(H)-N})];
+do_replace(H,[{A,B}|T],Replacement,N) ->
+ [part(H,{N,A-N}),
+ if
+ is_list(Replacement) ->
+ do_insert(Replacement, part(H,{A,B}));
+ true ->
+ Replacement
+ end
+ | do_replace(H,T,Replacement,A+B)].
+
+do_insert([X],_) ->
+ [X];
+do_insert([H|T],R) ->
+ [H,R|do_insert(T,R)].
+
+splitat(H,N,[]) ->
+ [part(H,{N,byte_size(H)-N})];
+splitat(H,N,[I|T]) ->
+ [part(H,{N,I-N})|splitat(H,I,T)].
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% first, last and at
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+first(Subject) ->
+ try
+ <<A:8,_/binary>> = Subject,
+ A
+ catch
+ _:_ ->
+ erlang:error(badarg)
+ end.
+
+last(Subject) ->
+ try
+ N = byte_size(Subject) - 1,
+ <<_:N/binary,A:8>> = Subject,
+ A
+ catch
+ _:_ ->
+ erlang:error(badarg)
+ end.
+
+at(Subject,X) ->
+ try
+ <<_:X/binary,A:8,_/binary>> = Subject,
+ A
+ catch
+ _:_ ->
+ erlang:error(badarg)
+ end.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% part
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+part(Subject,Part) ->
+ try
+ do_part(Subject,Part)
+ catch
+ _:_ ->
+ erlang:error(badarg)
+ end.
+
+part(Subject,Pos,Len) ->
+ part(Subject,{Pos,Len}).
+
+do_part(Bin,{A,B}) when B >= 0 ->
+ <<_:A/binary,Sub:B/binary,_/binary>> = Bin,
+ Sub;
+do_part(Bin,{A,B}) when B < 0 ->
+ S = A + B,
+ L = -B,
+ <<_:S/binary,Sub:L/binary,_/binary>> = Bin,
+ Sub.
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% copy
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+copy(Subject) ->
+ copy(Subject,1).
+copy(Subject,N) ->
+ try
+ true = is_integer(N) and (N > 0) and is_binary(Subject), % Badarg, not function clause
+ erlang:list_to_binary(lists:duplicate(N,Subject))
+ catch
+ _:_ ->
+ erlang:error(badarg)
+ end.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% encode_unsigned
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+encode_unsigned(Unsigned) ->
+ encode_unsigned(Unsigned,big).
+encode_unsigned(Unsigned,Endian) ->
+ try
+ true = is_integer(Unsigned) and (Unsigned >= 0),
+ if
+ Unsigned =:= 0 ->
+ <<0>>;
+ true ->
+ case Endian of
+ big ->
+ list_to_binary(do_encode(Unsigned,[]));
+ little ->
+ list_to_binary(do_encode_r(Unsigned))
+ end
+ end
+ catch
+ _:_ ->
+ erlang:error(badarg)
+ end.
+
+do_encode(0,L) ->
+ L;
+do_encode(N,L) ->
+ Byte = N band 255,
+ NewN = N bsr 8,
+ do_encode(NewN,[Byte|L]).
+
+do_encode_r(0) ->
+ [];
+do_encode_r(N) ->
+ Byte = N band 255,
+ NewN = N bsr 8,
+ [Byte|do_encode_r(NewN)].
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% decode_unsigned
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+decode_unsigned(Subject) ->
+ decode_unsigned(Subject,big).
+
+decode_unsigned(Subject,Endian) ->
+ try
+ true = is_binary(Subject) and (byte_size(Subject) > 0),
+ case Endian of
+ big ->
+ do_decode(Subject,0);
+ little ->
+ do_decode_r(Subject,0)
+ end
+ catch
+ _:_ ->
+ erlang:error(badarg)
+ end.
+
+do_decode(<<>>,N) ->
+ N;
+do_decode(<<X:8,Bin/binary>>,N) ->
+ do_decode(Bin,(N bsl 8) bor X).
+
+do_decode_r(<<>>,N) ->
+ N;
+do_decode_r(Bin,N) ->
+ Sz = byte_size(Bin) - 1,
+ <<NewBin:Sz/binary,X>> = Bin,
+ do_decode_r(NewBin, (N bsl 8) bor X).
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% referenced_byte_size cannot
+%% be implemented in pure
+%% erlang
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+referenced_byte_size(Bin) when is_binary(Bin) ->
+ erlang:error(not_implemented);
+referenced_byte_size(_) ->
+ erlang:error(badarg).
+
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% Simple helper functions
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%% Option "parsing"
+get_opts_match([],Part) ->
+ Part;
+get_opts_match([{scope,{A,B}} | T],_Part) ->
+ get_opts_match(T,{A,B});
+get_opts_match(_,_) ->
+ throw(badopt).
+
+get_opts_split([],{Part,Global,Trim}) ->
+ {Part,Global,Trim};
+get_opts_split([{scope,{A,B}} | T],{_Part,Global,Trim}) ->
+ get_opts_split(T,{{A,B},Global,Trim});
+get_opts_split([global | T],{Part,_Global,Trim}) ->
+ get_opts_split(T,{Part,true,Trim});
+get_opts_split([trim | T],{Part,Global,_Trim}) ->
+ get_opts_split(T,{Part,Global,true});
+get_opts_split(_,_) ->
+ throw(badopt).
+
+get_opts_replace([],{Part,Global,Insert}) ->
+ {Part,Global,Insert};
+get_opts_replace([{scope,{A,B}} | T],{_Part,Global,Insert}) ->
+ get_opts_replace(T,{{A,B},Global,Insert});
+get_opts_replace([global | T],{Part,_Global,Insert}) ->
+ get_opts_replace(T,{Part,true,Insert});
+get_opts_replace([{insert_replaced,N} | T],{Part,Global,_Insert}) ->
+ get_opts_replace(T,{Part,Global,N});
+get_opts_replace(_,_) ->
+ throw(badopt).