aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2016-08-10 10:09:56 +0200
committerBjörn Gustavsson <[email protected]>2016-08-10 10:09:56 +0200
commit73d9d1d3adabce8636a0010d6fdb6be37c36cae2 (patch)
treeec1093d5760cdeb2f03a9929f355515dd5106af2
parent3dc416bb89e577c83e16c306fd4821da03681ab9 (diff)
parenta5fcd4f26969a768950dc643eeed2fdb41a5dc41 (diff)
downloadotp-73d9d1d3adabce8636a0010d6fdb6be37c36cae2.tar.gz
otp-73d9d1d3adabce8636a0010d6fdb6be37c36cae2.tar.bz2
otp-73d9d1d3adabce8636a0010d6fdb6be37c36cae2.zip
Merge branch 'josevalim/large-binary-strings/PR-1131/OTP-13794'
* josevalim/large-binary-strings/PR-1131/OTP-13794: Move expansion of strings in binaries to v3_core
-rw-r--r--lib/compiler/src/sys_pre_expand.erl18
-rw-r--r--lib/compiler/src/v3_core.erl24
-rw-r--r--lib/compiler/test/bs_bincomp_SUITE.erl1
-rw-r--r--lib/compiler/test/bs_utf_SUITE.erl1
-rw-r--r--lib/debugger/src/dbg_iload.erl19
-rw-r--r--lib/debugger/test/bs_bincomp_SUITE.erl1
-rw-r--r--lib/stdlib/src/eval_bits.erl14
7 files changed, 53 insertions, 25 deletions
diff --git a/lib/compiler/src/sys_pre_expand.erl b/lib/compiler/src/sys_pre_expand.erl
index 7ab4e1845c..f996a2d2d7 100644
--- a/lib/compiler/src/sys_pre_expand.erl
+++ b/lib/compiler/src/sys_pre_expand.erl
@@ -520,9 +520,8 @@ new_fun_name(#expand{func=F,arity=A,fcount=I}=St, FName) ->
%% pattern_bin([Element], State) -> {[Element],[Variable],[UsedVar],State}.
-pattern_bin(Es0, St) ->
- Es1 = bin_expand_strings(Es0),
- foldr(fun (E, Acc) -> pattern_element(E, Acc) end, {[],St}, Es1).
+pattern_bin(Es, St) ->
+ foldr(fun (E, Acc) -> pattern_element(E, Acc) end, {[],St}, Es).
pattern_element({bin_element,Line,Expr0,Size0,Type0}, {Es,St0}) ->
{Expr1,St1} = pattern(Expr0, St0),
@@ -558,9 +557,8 @@ coerce_to_float(E, _) -> E.
%% expr_bin([Element], State) -> {[Element],State}.
-expr_bin(Es0, St) ->
- Es1 = bin_expand_strings(Es0),
- foldr(fun (E, Acc) -> bin_element(E, Acc) end, {[],St}, Es1).
+expr_bin(Es, St) ->
+ foldr(fun (E, Acc) -> bin_element(E, Acc) end, {[],St}, Es).
bin_element({bin_element,Line,Expr,Size,Type}, {Es,St0}) ->
{Expr1,St1} = expr(Expr, St0),
@@ -570,14 +568,6 @@ bin_element({bin_element,Line,Expr,Size,Type}, {Es,St0}) ->
{Size2,Type1} = make_bit_type(Line, Size1, Type),
{[{bin_element,Line,Expr1,Size2,Type1}|Es],St2}.
-bin_expand_strings(Es) ->
- foldr(fun ({bin_element,Line,{string,_,S},Sz,Ts}, Es1) ->
- foldr(fun (C, Es2) ->
- [{bin_element,Line,{char,Line,C},Sz,Ts}|Es2]
- end, Es1, S);
- (E, Es1) -> [E|Es1]
- end, [], Es).
-
%% new_var_name(State) -> {VarName,State}.
new_var_name(St) ->
diff --git a/lib/compiler/src/v3_core.erl b/lib/compiler/src/v3_core.erl
index d71411de80..634ec68736 100644
--- a/lib/compiler/src/v3_core.erl
+++ b/lib/compiler/src/v3_core.erl
@@ -901,7 +901,7 @@ try_after(As, St0) ->
expr_bin(Es0, Anno, St0) ->
case constant_bin(Es0) of
error ->
- {Es,Eps,St} = expr_bin_1(Es0, St0),
+ {Es,Eps,St} = expr_bin_1(bin_expand_strings(Es0), St0),
{#ibinary{anno=#a{anno=Anno},segments=Es},Eps,St};
Bin ->
{#c_literal{anno=Anno,val=Bin},[],St0}
@@ -923,7 +923,8 @@ constant_bin(Es) ->
constant_bin_1(Es) ->
verify_suitable_fields(Es),
EmptyBindings = erl_eval:new_bindings(),
- EvalFun = fun({integer,_,I}, B) -> {value,I,B};
+ EvalFun = fun({string,_,S}, B) -> {value,S,B};
+ ({integer,_,I}, B) -> {value,I,B};
({char,_,C}, B) -> {value,C,B};
({float,_,F}, B) -> {value,F,B};
({atom,_,undefined}, B) -> {value,undefined,B}
@@ -944,6 +945,9 @@ verify_suitable_fields([{bin_element,_,Val,SzTerm,Opts}|Es]) ->
end,
{unit,Unit} = keyfind(unit, 1, Opts),
case {SzTerm,Val} of
+ {{atom,_,undefined},{string,_,_}} ->
+ %% UTF-8/16/32.
+ ok;
{{atom,_,undefined},{char,_,_}} ->
%% UTF-8/16/32.
ok;
@@ -983,6 +987,14 @@ count_bits(Int) ->
count_bits_1(0, Bits) -> Bits;
count_bits_1(Int, Bits) -> count_bits_1(Int bsr 64, Bits+64).
+bin_expand_strings(Es) ->
+ foldr(fun ({bin_element,Line,{string,_,S},Sz,Ts}, Es1) ->
+ foldr(fun (C, Es2) ->
+ [{bin_element,Line,{char,Line,C},Sz,Ts}|Es2]
+ end, Es1, S);
+ (E, Es1) -> [E|Es1]
+ end, [], Es).
+
expr_bin_1(Es, St) ->
foldr(fun (E, {Ces,Esp,St0}) ->
{Ce,Ep,St1} = bitstr(E, St0),
@@ -1394,6 +1406,9 @@ bc_elem_size({bin,_,El}, St0) ->
bc_elem_size(_, _) ->
throw(impossible).
+bc_elem_size_1([{bin_element,_,{string,_,String},{integer,_,N},Flags}|Es], Bits, Vars) ->
+ {unit,U} = keyfind(unit, 1, Flags),
+ bc_elem_size_1(Es, Bits+U*N*length(String), Vars);
bc_elem_size_1([{bin_element,_,_,{integer,_,N},Flags}|Es], Bits, Vars) ->
{unit,U} = keyfind(unit, 1, Flags),
bc_elem_size_1(Es, Bits+U*N, Vars);
@@ -1513,6 +1528,9 @@ bc_list_length(_, _) ->
bc_bin_size({bin,_,Els}) ->
bc_bin_size_1(Els, 0).
+bc_bin_size_1([{bin_element,_,{string,_,String},{integer,_,Sz},Flags}|Els], N) ->
+ {unit,U} = keyfind(unit, 1, Flags),
+ bc_bin_size_1(Els, N+U*Sz*length(String));
bc_bin_size_1([{bin_element,_,_,{integer,_,Sz},Flags}|Els], N) ->
{unit,U} = keyfind(unit, 1, Flags),
bc_bin_size_1(Els, N+U*Sz);
@@ -1736,7 +1754,7 @@ pat_alias_map_pairs_1([]) -> [].
%% pat_bin([BinElement], State) -> [BinSeg].
-pat_bin(Ps, St) -> [pat_segment(P, St) || P <- Ps].
+pat_bin(Ps, St) -> [pat_segment(P, St) || P <- bin_expand_strings(Ps)].
pat_segment({bin_element,L,Val,Size,[Type,{unit,Unit}|Flags]}, St) ->
Anno = lineno_anno(L, St),
diff --git a/lib/compiler/test/bs_bincomp_SUITE.erl b/lib/compiler/test/bs_bincomp_SUITE.erl
index 4743821337..dd1d245f88 100644
--- a/lib/compiler/test/bs_bincomp_SUITE.erl
+++ b/lib/compiler/test/bs_bincomp_SUITE.erl
@@ -56,6 +56,7 @@ end_per_group(_GroupName, Config) ->
byte_aligned(Config) when is_list(Config) ->
cs_init(),
<<"abcdefg">> = cs(<< <<(X+32)>> || <<X>> <= <<"ABCDEFG">> >>),
+ <<"AxyzBxyzCxyz">> = cs(<< <<X, "xyz">> || <<X>> <= <<"ABC">> >>),
<<1:32/little,2:32/little,3:32/little,4:32/little>> =
cs(<< <<X:32/little>> || <<X:32>> <= <<1:32,2:32,3:32,4:32>> >>),
cs(<<1:32/little,2:32/little,3:32/little,4:32/little>> =
diff --git a/lib/compiler/test/bs_utf_SUITE.erl b/lib/compiler/test/bs_utf_SUITE.erl
index c894041f72..ef3fc54b37 100644
--- a/lib/compiler/test/bs_utf_SUITE.erl
+++ b/lib/compiler/test/bs_utf_SUITE.erl
@@ -235,6 +235,7 @@ utf32_to_unicode(<<>>) -> [].
literals(Config) when is_list(Config) ->
abc_utf8 = match_literal(<<"abc"/utf8>>),
abc_utf8 = match_literal(<<$a,$b,$c>>),
+ abc_utf8 = match_literal(<<$a/utf8,$b/utf8,$c/utf8>>),
abc_utf16be = match_literal(<<"abc"/utf16>>),
abc_utf16be = match_literal(<<$a:16,$b:16,$c:16>>),
diff --git a/lib/debugger/src/dbg_iload.erl b/lib/debugger/src/dbg_iload.erl
index 369b456524..f83684b605 100644
--- a/lib/debugger/src/dbg_iload.erl
+++ b/lib/debugger/src/dbg_iload.erl
@@ -216,7 +216,7 @@ pattern({op,_,'-',{float,Anno,I}}) ->
pattern({op,_,'+',{float,Anno,I}}) ->
{value,ln(Anno),I};
pattern({bin,Anno,Grp}) ->
- Grp1 = pattern_list(Grp),
+ Grp1 = pattern_list(bin_expand_strings(Grp)),
{bin,ln(Anno),Grp1};
pattern({bin_element,Anno,Expr,Size,Type}) ->
Expr1 = pattern(Expr),
@@ -297,7 +297,7 @@ gexpr({map,Anno,E0,Fs0}) ->
Fs1 = map_fields(Fs0, fun gexpr/1),
{map,ln(Anno),E1,Fs1};
gexpr({bin,Anno,Flds0}) ->
- Flds = gexpr_list(Flds0),
+ Flds = gexpr_list(bin_expand_strings(Flds0)),
{bin,ln(Anno),Flds};
gexpr({bin_element,Anno,Expr0,Size0,Type}) ->
Expr = gexpr(Expr0),
@@ -506,7 +506,7 @@ expr({op,Anno,Op,L0,R0}, _Lc) ->
R1 = expr(R0, false), %They see the same variables
{op,ln(Anno),Op,[L1,R1]};
expr({bin,Anno,Grp}, _Lc) ->
- Grp1 = expr_list(Grp),
+ Grp1 = expr_list(bin_expand_strings(Grp)),
{bin,ln(Anno),Grp1};
expr({bin_element,Anno,Expr,Size,Type}, _Lc) ->
Expr1 = expr(Expr, false),
@@ -519,6 +519,19 @@ consify([A|As]) ->
{cons,0,A,consify(As)};
consify([]) -> {value,0,[]}.
+%% The debugger converts both strings "abc" and lists [67, 68, 69]
+%% into {value, Line, [67, 68, 69]}, making it impossible to later
+%% distingish one or the other inside binaries when evaluating. To
+%% avoid <<[67, 68, 69]>> from evaluating, we convert strings into
+%% chars to avoid the ambiguity.
+bin_expand_strings(Es) ->
+ lists:foldr(fun ({bin_element,Line,{string,_,S},Sz,Ts}, Es1) ->
+ lists:foldr(fun (C, Es2) ->
+ [{bin_element,Line,{char,Line,C},Sz,Ts}|Es2]
+ end, Es1, S);
+ (E, Es1) -> [E|Es1]
+ end, [], Es).
+
%% -type expr_list([Expression]) -> [Expression].
%% These expressions are processed "in parallel" for purposes of variable
%% definition etc.
diff --git a/lib/debugger/test/bs_bincomp_SUITE.erl b/lib/debugger/test/bs_bincomp_SUITE.erl
index 39e2240f2d..064e9567b3 100644
--- a/lib/debugger/test/bs_bincomp_SUITE.erl
+++ b/lib/debugger/test/bs_bincomp_SUITE.erl
@@ -66,6 +66,7 @@ end_per_group(_GroupName, Config) ->
byte_aligned(Config) when is_list(Config) ->
<<"abcdefg">> = << <<(X+32)>> || <<X>> <= <<"ABCDEFG">> >>,
+ <<"AxyzBxyzCxyz">> = << <<X, "xyz">> || <<X>> <= <<"ABC">> >>,
<<1:32/little,2:32/little,3:32/little,4:32/little>> =
<< <<X:32/little>> || <<X:32>> <= <<1:32,2:32,3:32,4:32>> >>,
<<1:32/little,2:32/little,3:32/little,4:32/little>> =
diff --git a/lib/stdlib/src/eval_bits.erl b/lib/stdlib/src/eval_bits.erl
index 80667023fb..631faa3be5 100644
--- a/lib/stdlib/src/eval_bits.erl
+++ b/lib/stdlib/src/eval_bits.erl
@@ -67,16 +67,20 @@ expr_grp([Field | FS], Bs0, Lf, Acc) ->
expr_grp([], Bs0, _Lf, Acc) ->
{value,Acc,Bs0}.
+eval_field({bin_element, _, {string, _, S}, {integer,_,8}, [integer,{unit,1},unsigned,big]}, Bs0, _Fun) ->
+ Latin1 = [C band 16#FF || C <- S],
+ {list_to_binary(Latin1),Bs0};
eval_field({bin_element, _, {string, _, S}, default, default}, Bs0, _Fun) ->
Latin1 = [C band 16#FF || C <- S],
{list_to_binary(Latin1),Bs0};
-eval_field({bin_element, Line, {string, _, S}, Size0, Options0}, Bs, _Fun) ->
- {_Size,[Type,_Unit,_Sign,Endian]} =
+eval_field({bin_element, Line, {string, _, S}, Size0, Options0}, Bs0, Fun) ->
+ {Size1,[Type,{unit,Unit},Sign,Endian]} =
make_bit_type(Line, Size0, Options0),
- Res = << <<(eval_exp_field1(C, no_size, no_unit,
- Type, Endian, no_sign))/binary>> ||
+ {value,Size,Bs1} = Fun(Size1, Bs0),
+ Res = << <<(eval_exp_field1(C, Size, Unit,
+ Type, Endian, Sign))/binary>> ||
C <- S >>,
- {Res,Bs};
+ {Res,Bs1};
eval_field({bin_element,Line,E,Size0,Options0}, Bs0, Fun) ->
{value,V,Bs1} = Fun(E, Bs0),
{Size1,[Type,{unit,Unit},Sign,Endian]} =