diff options
author | José Valim <[email protected]> | 2016-07-16 22:24:50 +0200 |
---|---|---|
committer | José Valim <[email protected]> | 2016-08-04 13:22:14 +0200 |
commit | a5fcd4f26969a768950dc643eeed2fdb41a5dc41 (patch) | |
tree | c20bcd4a5dfeea6ccdc6a05ebd0eeaaeb0854a4d | |
parent | ddfae156c2b21d5266bd6eb82bf9ca7c508226fd (diff) | |
download | otp-a5fcd4f26969a768950dc643eeed2fdb41a5dc41.tar.gz otp-a5fcd4f26969a768950dc643eeed2fdb41a5dc41.tar.bz2 otp-a5fcd4f26969a768950dc643eeed2fdb41a5dc41.zip |
Move expansion of strings in binaries to v3_core
This speeds up the compilation of binary literals
with string values in them. For example, compiling
a file with a ~340kB binary would yield the following
times by the compiler:
Compiling "foo"
parse_module : 0.130 s 5327.6 kB
transform_module : 0.000 s 5327.6 kB
lint_module : 0.011 s 5327.8 kB
expand_module : 0.508 s 71881.2 kB
v3_core : 0.463 s 11.5 kB
Notice the increase in memory and processing time
in expand_module and v3_core. This happened because
expand_module would expand the string in binaries
into chars. For example, the binary <<"foo">>, which
is represented as
{bin, 1, [
{bin_element, 1, {string, 1, "foo"}, default, default}
]}
would be converted to
{bin, 1, [
{bin_element, 1, {char, 1, $f}, default, default},
{bin_element, 1, {char, 1, $o}, default, default},
{bin_element, 1, {char, 1, $o}, default, default}
]}
However, v3_core would then traverse all of those
characters and convert it into an actual binary, as it
is a literal value.
This patch addresses this issue by moving the expansion
of string into chars to v3_core and only if a literal
value cannot not be built. This reduces the compilation
time of the file mentioned above to the values below:
Compiling "bar"
parse_module : 0.134 s 5327.6 kB
transform_module : 0.000 s 5327.6 kB
lint_module : 0.005 s 5327.8 kB
expand_module : 0.000 s 5328.7 kB
v3_core : 0.013 s 11.2 kB
-rw-r--r-- | lib/compiler/src/sys_pre_expand.erl | 18 | ||||
-rw-r--r-- | lib/compiler/src/v3_core.erl | 24 | ||||
-rw-r--r-- | lib/compiler/test/bs_bincomp_SUITE.erl | 1 | ||||
-rw-r--r-- | lib/compiler/test/bs_utf_SUITE.erl | 1 | ||||
-rw-r--r-- | lib/debugger/src/dbg_iload.erl | 19 | ||||
-rw-r--r-- | lib/debugger/test/bs_bincomp_SUITE.erl | 1 | ||||
-rw-r--r-- | lib/stdlib/src/eval_bits.erl | 14 |
7 files changed, 53 insertions, 25 deletions
diff --git a/lib/compiler/src/sys_pre_expand.erl b/lib/compiler/src/sys_pre_expand.erl index 7ab4e1845c..f996a2d2d7 100644 --- a/lib/compiler/src/sys_pre_expand.erl +++ b/lib/compiler/src/sys_pre_expand.erl @@ -520,9 +520,8 @@ new_fun_name(#expand{func=F,arity=A,fcount=I}=St, FName) -> %% pattern_bin([Element], State) -> {[Element],[Variable],[UsedVar],State}. -pattern_bin(Es0, St) -> - Es1 = bin_expand_strings(Es0), - foldr(fun (E, Acc) -> pattern_element(E, Acc) end, {[],St}, Es1). +pattern_bin(Es, St) -> + foldr(fun (E, Acc) -> pattern_element(E, Acc) end, {[],St}, Es). pattern_element({bin_element,Line,Expr0,Size0,Type0}, {Es,St0}) -> {Expr1,St1} = pattern(Expr0, St0), @@ -558,9 +557,8 @@ coerce_to_float(E, _) -> E. %% expr_bin([Element], State) -> {[Element],State}. -expr_bin(Es0, St) -> - Es1 = bin_expand_strings(Es0), - foldr(fun (E, Acc) -> bin_element(E, Acc) end, {[],St}, Es1). +expr_bin(Es, St) -> + foldr(fun (E, Acc) -> bin_element(E, Acc) end, {[],St}, Es). bin_element({bin_element,Line,Expr,Size,Type}, {Es,St0}) -> {Expr1,St1} = expr(Expr, St0), @@ -570,14 +568,6 @@ bin_element({bin_element,Line,Expr,Size,Type}, {Es,St0}) -> {Size2,Type1} = make_bit_type(Line, Size1, Type), {[{bin_element,Line,Expr1,Size2,Type1}|Es],St2}. -bin_expand_strings(Es) -> - foldr(fun ({bin_element,Line,{string,_,S},Sz,Ts}, Es1) -> - foldr(fun (C, Es2) -> - [{bin_element,Line,{char,Line,C},Sz,Ts}|Es2] - end, Es1, S); - (E, Es1) -> [E|Es1] - end, [], Es). - %% new_var_name(State) -> {VarName,State}. new_var_name(St) -> diff --git a/lib/compiler/src/v3_core.erl b/lib/compiler/src/v3_core.erl index d71411de80..634ec68736 100644 --- a/lib/compiler/src/v3_core.erl +++ b/lib/compiler/src/v3_core.erl @@ -901,7 +901,7 @@ try_after(As, St0) -> expr_bin(Es0, Anno, St0) -> case constant_bin(Es0) of error -> - {Es,Eps,St} = expr_bin_1(Es0, St0), + {Es,Eps,St} = expr_bin_1(bin_expand_strings(Es0), St0), {#ibinary{anno=#a{anno=Anno},segments=Es},Eps,St}; Bin -> {#c_literal{anno=Anno,val=Bin},[],St0} @@ -923,7 +923,8 @@ constant_bin(Es) -> constant_bin_1(Es) -> verify_suitable_fields(Es), EmptyBindings = erl_eval:new_bindings(), - EvalFun = fun({integer,_,I}, B) -> {value,I,B}; + EvalFun = fun({string,_,S}, B) -> {value,S,B}; + ({integer,_,I}, B) -> {value,I,B}; ({char,_,C}, B) -> {value,C,B}; ({float,_,F}, B) -> {value,F,B}; ({atom,_,undefined}, B) -> {value,undefined,B} @@ -944,6 +945,9 @@ verify_suitable_fields([{bin_element,_,Val,SzTerm,Opts}|Es]) -> end, {unit,Unit} = keyfind(unit, 1, Opts), case {SzTerm,Val} of + {{atom,_,undefined},{string,_,_}} -> + %% UTF-8/16/32. + ok; {{atom,_,undefined},{char,_,_}} -> %% UTF-8/16/32. ok; @@ -983,6 +987,14 @@ count_bits(Int) -> count_bits_1(0, Bits) -> Bits; count_bits_1(Int, Bits) -> count_bits_1(Int bsr 64, Bits+64). +bin_expand_strings(Es) -> + foldr(fun ({bin_element,Line,{string,_,S},Sz,Ts}, Es1) -> + foldr(fun (C, Es2) -> + [{bin_element,Line,{char,Line,C},Sz,Ts}|Es2] + end, Es1, S); + (E, Es1) -> [E|Es1] + end, [], Es). + expr_bin_1(Es, St) -> foldr(fun (E, {Ces,Esp,St0}) -> {Ce,Ep,St1} = bitstr(E, St0), @@ -1394,6 +1406,9 @@ bc_elem_size({bin,_,El}, St0) -> bc_elem_size(_, _) -> throw(impossible). +bc_elem_size_1([{bin_element,_,{string,_,String},{integer,_,N},Flags}|Es], Bits, Vars) -> + {unit,U} = keyfind(unit, 1, Flags), + bc_elem_size_1(Es, Bits+U*N*length(String), Vars); bc_elem_size_1([{bin_element,_,_,{integer,_,N},Flags}|Es], Bits, Vars) -> {unit,U} = keyfind(unit, 1, Flags), bc_elem_size_1(Es, Bits+U*N, Vars); @@ -1513,6 +1528,9 @@ bc_list_length(_, _) -> bc_bin_size({bin,_,Els}) -> bc_bin_size_1(Els, 0). +bc_bin_size_1([{bin_element,_,{string,_,String},{integer,_,Sz},Flags}|Els], N) -> + {unit,U} = keyfind(unit, 1, Flags), + bc_bin_size_1(Els, N+U*Sz*length(String)); bc_bin_size_1([{bin_element,_,_,{integer,_,Sz},Flags}|Els], N) -> {unit,U} = keyfind(unit, 1, Flags), bc_bin_size_1(Els, N+U*Sz); @@ -1736,7 +1754,7 @@ pat_alias_map_pairs_1([]) -> []. %% pat_bin([BinElement], State) -> [BinSeg]. -pat_bin(Ps, St) -> [pat_segment(P, St) || P <- Ps]. +pat_bin(Ps, St) -> [pat_segment(P, St) || P <- bin_expand_strings(Ps)]. pat_segment({bin_element,L,Val,Size,[Type,{unit,Unit}|Flags]}, St) -> Anno = lineno_anno(L, St), diff --git a/lib/compiler/test/bs_bincomp_SUITE.erl b/lib/compiler/test/bs_bincomp_SUITE.erl index 4743821337..dd1d245f88 100644 --- a/lib/compiler/test/bs_bincomp_SUITE.erl +++ b/lib/compiler/test/bs_bincomp_SUITE.erl @@ -56,6 +56,7 @@ end_per_group(_GroupName, Config) -> byte_aligned(Config) when is_list(Config) -> cs_init(), <<"abcdefg">> = cs(<< <<(X+32)>> || <<X>> <= <<"ABCDEFG">> >>), + <<"AxyzBxyzCxyz">> = cs(<< <<X, "xyz">> || <<X>> <= <<"ABC">> >>), <<1:32/little,2:32/little,3:32/little,4:32/little>> = cs(<< <<X:32/little>> || <<X:32>> <= <<1:32,2:32,3:32,4:32>> >>), cs(<<1:32/little,2:32/little,3:32/little,4:32/little>> = diff --git a/lib/compiler/test/bs_utf_SUITE.erl b/lib/compiler/test/bs_utf_SUITE.erl index c894041f72..ef3fc54b37 100644 --- a/lib/compiler/test/bs_utf_SUITE.erl +++ b/lib/compiler/test/bs_utf_SUITE.erl @@ -235,6 +235,7 @@ utf32_to_unicode(<<>>) -> []. literals(Config) when is_list(Config) -> abc_utf8 = match_literal(<<"abc"/utf8>>), abc_utf8 = match_literal(<<$a,$b,$c>>), + abc_utf8 = match_literal(<<$a/utf8,$b/utf8,$c/utf8>>), abc_utf16be = match_literal(<<"abc"/utf16>>), abc_utf16be = match_literal(<<$a:16,$b:16,$c:16>>), diff --git a/lib/debugger/src/dbg_iload.erl b/lib/debugger/src/dbg_iload.erl index 369b456524..f83684b605 100644 --- a/lib/debugger/src/dbg_iload.erl +++ b/lib/debugger/src/dbg_iload.erl @@ -216,7 +216,7 @@ pattern({op,_,'-',{float,Anno,I}}) -> pattern({op,_,'+',{float,Anno,I}}) -> {value,ln(Anno),I}; pattern({bin,Anno,Grp}) -> - Grp1 = pattern_list(Grp), + Grp1 = pattern_list(bin_expand_strings(Grp)), {bin,ln(Anno),Grp1}; pattern({bin_element,Anno,Expr,Size,Type}) -> Expr1 = pattern(Expr), @@ -297,7 +297,7 @@ gexpr({map,Anno,E0,Fs0}) -> Fs1 = map_fields(Fs0, fun gexpr/1), {map,ln(Anno),E1,Fs1}; gexpr({bin,Anno,Flds0}) -> - Flds = gexpr_list(Flds0), + Flds = gexpr_list(bin_expand_strings(Flds0)), {bin,ln(Anno),Flds}; gexpr({bin_element,Anno,Expr0,Size0,Type}) -> Expr = gexpr(Expr0), @@ -506,7 +506,7 @@ expr({op,Anno,Op,L0,R0}, _Lc) -> R1 = expr(R0, false), %They see the same variables {op,ln(Anno),Op,[L1,R1]}; expr({bin,Anno,Grp}, _Lc) -> - Grp1 = expr_list(Grp), + Grp1 = expr_list(bin_expand_strings(Grp)), {bin,ln(Anno),Grp1}; expr({bin_element,Anno,Expr,Size,Type}, _Lc) -> Expr1 = expr(Expr, false), @@ -519,6 +519,19 @@ consify([A|As]) -> {cons,0,A,consify(As)}; consify([]) -> {value,0,[]}. +%% The debugger converts both strings "abc" and lists [67, 68, 69] +%% into {value, Line, [67, 68, 69]}, making it impossible to later +%% distingish one or the other inside binaries when evaluating. To +%% avoid <<[67, 68, 69]>> from evaluating, we convert strings into +%% chars to avoid the ambiguity. +bin_expand_strings(Es) -> + lists:foldr(fun ({bin_element,Line,{string,_,S},Sz,Ts}, Es1) -> + lists:foldr(fun (C, Es2) -> + [{bin_element,Line,{char,Line,C},Sz,Ts}|Es2] + end, Es1, S); + (E, Es1) -> [E|Es1] + end, [], Es). + %% -type expr_list([Expression]) -> [Expression]. %% These expressions are processed "in parallel" for purposes of variable %% definition etc. diff --git a/lib/debugger/test/bs_bincomp_SUITE.erl b/lib/debugger/test/bs_bincomp_SUITE.erl index 39e2240f2d..064e9567b3 100644 --- a/lib/debugger/test/bs_bincomp_SUITE.erl +++ b/lib/debugger/test/bs_bincomp_SUITE.erl @@ -66,6 +66,7 @@ end_per_group(_GroupName, Config) -> byte_aligned(Config) when is_list(Config) -> <<"abcdefg">> = << <<(X+32)>> || <<X>> <= <<"ABCDEFG">> >>, + <<"AxyzBxyzCxyz">> = << <<X, "xyz">> || <<X>> <= <<"ABC">> >>, <<1:32/little,2:32/little,3:32/little,4:32/little>> = << <<X:32/little>> || <<X:32>> <= <<1:32,2:32,3:32,4:32>> >>, <<1:32/little,2:32/little,3:32/little,4:32/little>> = diff --git a/lib/stdlib/src/eval_bits.erl b/lib/stdlib/src/eval_bits.erl index 80667023fb..631faa3be5 100644 --- a/lib/stdlib/src/eval_bits.erl +++ b/lib/stdlib/src/eval_bits.erl @@ -67,16 +67,20 @@ expr_grp([Field | FS], Bs0, Lf, Acc) -> expr_grp([], Bs0, _Lf, Acc) -> {value,Acc,Bs0}. +eval_field({bin_element, _, {string, _, S}, {integer,_,8}, [integer,{unit,1},unsigned,big]}, Bs0, _Fun) -> + Latin1 = [C band 16#FF || C <- S], + {list_to_binary(Latin1),Bs0}; eval_field({bin_element, _, {string, _, S}, default, default}, Bs0, _Fun) -> Latin1 = [C band 16#FF || C <- S], {list_to_binary(Latin1),Bs0}; -eval_field({bin_element, Line, {string, _, S}, Size0, Options0}, Bs, _Fun) -> - {_Size,[Type,_Unit,_Sign,Endian]} = +eval_field({bin_element, Line, {string, _, S}, Size0, Options0}, Bs0, Fun) -> + {Size1,[Type,{unit,Unit},Sign,Endian]} = make_bit_type(Line, Size0, Options0), - Res = << <<(eval_exp_field1(C, no_size, no_unit, - Type, Endian, no_sign))/binary>> || + {value,Size,Bs1} = Fun(Size1, Bs0), + Res = << <<(eval_exp_field1(C, Size, Unit, + Type, Endian, Sign))/binary>> || C <- S >>, - {Res,Bs}; + {Res,Bs1}; eval_field({bin_element,Line,E,Size0,Options0}, Bs0, Fun) -> {value,V,Bs1} = Fun(E, Bs0), {Size1,[Type,{unit,Unit},Sign,Endian]} = |