From e48f59aff695e55c0150664a6cc4f441c1636a16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 20 Mar 2018 07:27:36 +0100 Subject: Allow the match context identifier to be any term During compilation, the bs_save2 and bs_restore2 instructions contain a match context reference. That reference is the variable name that holds the match context. beam_clean assumes that the reference always is an atom, which is not a safe assumption since integers are legal variable names in Core Erlang. --- lib/compiler/src/beam_clean.erl | 2 +- lib/compiler/src/v3_codegen.erl | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/beam_clean.erl b/lib/compiler/src/beam_clean.erl index 7ddf9fa2e2..955c128699 100644 --- a/lib/compiler/src/beam_clean.erl +++ b/lib/compiler/src/beam_clean.erl @@ -254,7 +254,7 @@ bs_restores([_|Is], Dict) -> bs_restores([], Dict) -> Dict. %% Pass 2. -bs_replace([{test,bs_start_match2,F,Live,[Src,Ctx],CtxR}|T], Dict, Acc) when is_atom(Ctx) -> +bs_replace([{test,bs_start_match2,F,Live,[Src,{context,Ctx}],CtxR}|T], Dict, Acc) -> Slots = case gb_trees:lookup(Ctx, Dict) of {value,Slots0} -> Slots0; none -> 0 diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl index a8f4926e55..8808c0a3b7 100644 --- a/lib/compiler/src/v3_codegen.erl +++ b/lib/compiler/src/v3_codegen.erl @@ -1162,7 +1162,7 @@ select_binary(#k_val_clause{val=#k_binary{segs=#k_var{name=V}},body=B, {Bis0,Aft,St1} = match_cg(B, Vf, Int0, St0#cg{ctx=V}), CtxReg = fetch_var(V, Int0), Live = max_reg(Bef#sr.reg), - Bis1 = [{test,bs_start_match2,{f,Tf},Live,[CtxReg,V],CtxReg}, + Bis1 = [{test,bs_start_match2,{f,Tf},Live,[CtxReg,{context,V}],CtxReg}, {bs_save2,CtxReg,{V,V}}|Bis0], Bis = finish_select_binary(Bis1), {Bis,Aft,St1#cg{ctx=OldCtx}}; @@ -1174,7 +1174,8 @@ select_binary(#k_val_clause{val=#k_binary{segs=#k_var{name=Ivar}},body=B, {Bis0,Aft,St1} = match_cg(B, Vf, Int0, St0#cg{ctx=Ivar}), CtxReg = fetch_var(Ivar, Int0), Live = max_reg(Bef#sr.reg), - Bis1 = [{test,bs_start_match2,{f,Tf},Live,[fetch_var(V, Bef),Ivar],CtxReg}, + Bis1 = [{test,bs_start_match2,{f,Tf},Live, + [fetch_var(V, Bef),{context,Ivar}],CtxReg}, {bs_save2,CtxReg,{Ivar,Ivar}}|Bis0], Bis = finish_select_binary(Bis1), {Bis,Aft,St1#cg{ctx=OldCtx}}. -- cgit v1.2.3 From 43a91c5e461e3fbec924e332f42fd69b81be34b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 22 Mar 2018 12:38:39 +0100 Subject: cerl_inline: Fix a name capture bug The way variables created by make_template() are used, it is necessary that the names are unique in the entire function. This has not happened to cause any problems in the past because all other compiler passes created atom variable names, not integer variable names. If other passes start to create integer variable names, this bug is exposed. --- lib/compiler/src/cerl_inline.erl | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/cerl_inline.erl b/lib/compiler/src/cerl_inline.erl index f5afa75b16..caff47dbcb 100644 --- a/lib/compiler/src/cerl_inline.erl +++ b/lib/compiler/src/cerl_inline.erl @@ -1822,6 +1822,14 @@ new_var(Env) -> Name = env__new_vname(Env), c_var(Name). +%% The way a template variable is used makes it necessary +%% to make sure that it is unique in the entire function. +%% Therefore, template variables are atoms with the prefix "@i". + +new_template_var(Env) -> + Name = env__new_tname(Env), + c_var(Name). + residualize_var(R, S) -> S1 = count_size(weight(var), S), {ref_to_var(R), st__set_var_referenced(R#ref.loc, S1)}. @@ -2183,7 +2191,7 @@ make_template(E, Vs0, Env0) -> T = make_data_skel(data_type(E), Ts), E1 = update_data(E, data_type(E), [hd(get_ann(T)) || T <- Ts]), - V = new_var(Env1), + V = new_template_var(Env1), Env2 = env__bind(var_name(V), E1, Env1), {set_ann(T, [V]), [V | Vs1], Env2}; false -> @@ -2198,7 +2206,7 @@ make_template(E, Vs0, Env0) -> Env2 = env__bind(V, E1, Env1), {T, Vs1, Env2}; _ -> - V = new_var(Env0), + V = new_template_var(Env0), Env1 = env__bind(var_name(V), E, Env0), {set_ann(V, [V]), [V | Vs0], Env1} end @@ -2564,6 +2572,11 @@ env__is_defined(Key, Env) -> env__new_vname(Env) -> rec_env:new_key(Env). +env__new_tname(Env) -> + rec_env:new_key(fun(I) -> + list_to_atom("@i"++integer_to_list(I)) + end, Env). + env__new_fname(A, N, Env) -> rec_env:new_key(fun (X) -> S = integer_to_list(X), -- cgit v1.2.3 From 9d2f5cde19cffca9a00b8fad8075bf160cc872d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Fri, 23 Mar 2018 11:51:28 +0100 Subject: Add cerl_trees:next_free_variable_name/1 --- lib/compiler/src/cerl_trees.erl | 109 ++++++++++++++++++++++++++++++++++++- lib/compiler/src/sys_core_fold.erl | 8 +++ 2 files changed, 116 insertions(+), 1 deletion(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/cerl_trees.erl b/lib/compiler/src/cerl_trees.erl index f30a0b33ac..c7a129b42c 100644 --- a/lib/compiler/src/cerl_trees.erl +++ b/lib/compiler/src/cerl_trees.erl @@ -22,7 +22,8 @@ -module(cerl_trees). -export([depth/1, fold/3, free_variables/1, get_label/1, label/1, label/2, - map/2, mapfold/3, mapfold/4, size/1, variables/1]). + map/2, mapfold/3, mapfold/4, next_free_variable_name/1, + size/1, variables/1]). -import(cerl, [alias_pat/1, alias_var/1, ann_c_alias/3, ann_c_apply/3, ann_c_binary/2, ann_c_bitstr/6, ann_c_call/4, @@ -507,6 +508,7 @@ mapfold_pairs(_, _, S, []) -> %% well-formed Core Erlang syntax tree. %% %% @see free_variables/1 +%% @see next_free_variable_name/1 -spec variables(cerl:cerl()) -> [cerl:var_name()]. @@ -519,6 +521,7 @@ variables(T) -> %% @doc Like variables/1, but only includes variables %% that are free in the tree. %% +%% @see next_free_variable_name/1 %% @see variables/1 -spec free_variables(cerl:cerl()) -> [cerl:var_name()]. @@ -678,6 +681,110 @@ var_list_names([V | Vs], A) -> var_list_names([], A) -> A. +%% --------------------------------------------------------------------- + +%% @spec next_free_variable_name(Tree::cerl()) -> var_name() +%% +%% var_name() = integer() +%% +%% @doc Returns a integer variable name higher than any other integer +%% variable name in the syntax tree. An exception is thrown if +%% Tree does not represent a well-formed Core Erlang +%% syntax tree. +%% +%% @see variables/1 +%% @see free_variables/1 + +-spec next_free_variable_name(cerl:cerl()) -> integer(). + +next_free_variable_name(T) -> + 1 + next_free(T, -1). + +next_free(T, Max) -> + case type(T) of + literal -> + Max; + var -> + case var_name(T) of + Int when is_integer(Int) -> + max(Int, Max); + _ -> + Max + end; + values -> + next_free_in_list(values_es(T), Max); + cons -> + next_free(cons_hd(T), next_free(cons_tl(T), Max)); + tuple -> + next_free_in_list(tuple_es(T), Max); + map -> + next_free_in_list([map_arg(T)|map_es(T)], Max); + map_pair -> + next_free_in_list([map_pair_op(T),map_pair_key(T), + map_pair_val(T)], Max); + 'let' -> + Max1 = next_free(let_body(T), Max), + Max2 = next_free_in_list(let_vars(T), Max1), + next_free(let_arg(T), Max2); + seq -> + next_free(seq_arg(T), + next_free(seq_body(T), Max)); + apply -> + next_free(apply_op(T), + next_free_in_list(apply_args(T), Max)); + call -> + next_free(call_module(T), + next_free(call_name(T), + next_free_in_list( + call_args(T), Max))); + primop -> + next_free_in_list(primop_args(T), Max); + 'case' -> + next_free(case_arg(T), + next_free_in_list(case_clauses(T), Max)); + clause -> + Max1 = next_free(clause_guard(T), + next_free(clause_body(T), Max)), + next_free_in_list(clause_pats(T), Max1); + alias -> + next_free(alias_var(T), + next_free(alias_pat(T), Max)); + 'fun' -> + next_free(fun_body(T), + next_free_in_list(fun_vars(T), Max)); + 'receive' -> + Max1 = next_free_in_list(receive_clauses(T), + next_free(receive_timeout(T), Max)), + next_free(receive_action(T), Max1); + 'try' -> + Max1 = next_free(try_body(T), Max), + Max2 = next_free_in_list(try_vars(T), Max1), + Max3 = next_free(try_handler(T), Max2), + Max4 = next_free_in_list(try_evars(T), Max3), + next_free(try_arg(T), Max4); + 'catch' -> + next_free(catch_body(T), Max); + binary -> + next_free_in_list(binary_segments(T), Max); + bitstr -> + next_free(bitstr_val(T), next_free(bitstr_size(T), Max)); + letrec -> + Max1 = next_free_in_defs(letrec_defs(T), Max), + Max2 = next_free(letrec_body(T), Max1), + next_free_in_list(letrec_vars(T), Max2); + module -> + next_free_in_defs(module_defs(T), Max) + end. + +next_free_in_list([H | T], Max) -> + next_free_in_list(T, next_free(H, Max)); +next_free_in_list([], Max) -> + Max. + +next_free_in_defs([{_, Post} | Ds], Max) -> + next_free_in_defs(Ds, next_free(Post, Max)); +next_free_in_defs([], Max) -> + Max. %% --------------------------------------------------------------------- diff --git a/lib/compiler/src/sys_core_fold.erl b/lib/compiler/src/sys_core_fold.erl index a9bd363ee1..0354981562 100644 --- a/lib/compiler/src/sys_core_fold.erl +++ b/lib/compiler/src/sys_core_fold.erl @@ -119,6 +119,14 @@ module(#c_module{defs=Ds0}=Mod, Opts) -> function_1({#c_var{name={F,Arity}}=Name,B0}) -> try + %% Find a suitable starting value for the variable + %% counter. Note that this pass assumes that new_var_name/1 + %% returns a variable name distinct from any variable used in + %% the entire body of the function. We use integers as + %% variable names to avoid filling up the atom table when + %% compiling huge functions. + Count = cerl_trees:next_free_variable_name(B0), + put(new_var_num, Count), B = find_fixpoint(fun(Core) -> %% This must be a fun! expr(Core, value, sub_new()) -- cgit v1.2.3 From a5e80861faedc84d373ccda95ae6f8c7aff11bcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 20 Mar 2018 07:34:00 +0100 Subject: Avoid overflowing the atom table Use integer variable names instead of atoms in v3_core, sys_core_fold, and v3_kernel to avoid overflowing the atom table. It is a deliberate design decision to calculate the first free integer variable name (in sys_core_fold and v3_kernel) instead of somehow passing it from one pass to another. I don't want that kind of dependency between compiler passes. Also note that the next free variable name is not easily available after running the inliner. --- lib/compiler/src/sys_core_fold.erl | 14 +++++++++----- lib/compiler/src/v3_core.erl | 2 +- lib/compiler/src/v3_kernel.erl | 11 ++++++++--- 3 files changed, 18 insertions(+), 9 deletions(-) (limited to 'lib/compiler') diff --git a/lib/compiler/src/sys_core_fold.erl b/lib/compiler/src/sys_core_fold.erl index 0354981562..395b6bd677 100644 --- a/lib/compiler/src/sys_core_fold.erl +++ b/lib/compiler/src/sys_core_fold.erl @@ -108,16 +108,20 @@ module(#c_module{defs=Ds0}=Mod, Opts) -> put(no_inline_list_funcs, not member(inline_list_funcs, Opts)), - case get(new_var_num) of - undefined -> put(new_var_num, 0); - _ -> ok - end, init_warnings(), Ds1 = [function_1(D) || D <- Ds0], + erase(new_var_num), erase(no_inline_list_funcs), {ok,Mod#c_module{defs=Ds1},get_warnings()}. function_1({#c_var{name={F,Arity}}=Name,B0}) -> + %% Find a suitable starting value for the variable counter. Note + %% that this pass assumes that new_var_name/1 returns a variable + %% name distinct from any variable used in the entire body of + %% the function. We use integers as variable names to avoid + %% filling up the atom table when compiling huge functions. + Count = cerl_trees:next_free_variable_name(B0), + put(new_var_num, Count), try %% Find a suitable starting value for the variable %% counter. Note that this pass assumes that new_var_name/1 @@ -2162,7 +2166,7 @@ make_var(A) -> make_var_name() -> N = get(new_var_num), put(new_var_num, N+1), - list_to_atom("@f"++integer_to_list(N)). + N. letify(Bs, Body) -> Ann = cerl:get_ann(Body), diff --git a/lib/compiler/src/v3_core.erl b/lib/compiler/src/v3_core.erl index 6029b91cdc..df0cc3684f 100644 --- a/lib/compiler/src/v3_core.erl +++ b/lib/compiler/src/v3_core.erl @@ -2005,7 +2005,7 @@ new_fun_name(Type, #core{fcount=C}=St) -> %% new_var_name(State) -> {VarName,State}. new_var_name(#core{vcount=C}=St) -> - {list_to_atom("@c" ++ integer_to_list(C)),St#core{vcount=C + 1}}. + {C,St#core{vcount=C + 1}}. %% new_var(State) -> {{var,Name},State}. %% new_var(LineAnno, State) -> {{var,Name},State}. diff --git a/lib/compiler/src/v3_kernel.erl b/lib/compiler/src/v3_kernel.erl index dfe8d26afb..4e3ceedbc0 100644 --- a/lib/compiler/src/v3_kernel.erl +++ b/lib/compiler/src/v3_kernel.erl @@ -157,7 +157,13 @@ include_attribute(_) -> true. function({#c_var{name={F,Arity}=FA},Body}, St0) -> %%io:format("~w/~w~n", [F,Arity]), try - St1 = St0#kern{func=FA,ff=undefined,vcount=0,fcount=0,ds=cerl_sets:new()}, + %% Find a suitable starting value for the variable counter. Note + %% that this pass assumes that new_var_name/1 returns a variable + %% name distinct from any variable used in the entire body of + %% the function. We use integers as variable names to avoid + %% filling up the atom table when compiling huge functions. + Count = cerl_trees:next_free_variable_name(Body), + St1 = St0#kern{func=FA,ff=undefined,vcount=Count,fcount=0,ds=cerl_sets:new()}, {#ifun{anno=Ab,vars=Kvs,body=B0},[],St2} = expr(Body, new_sub(), St1), {B1,_,St3} = ubody(B0, return, St2), %%B1 = B0, St3 = St2, %Null second pass @@ -168,7 +174,6 @@ function({#c_var{name={F,Arity}=FA},Body}, St0) -> erlang:raise(Class, Error, Stack) end. - %% body(Cexpr, Sub, State) -> {Kexpr,[PreKepxr],State}. %% Do the main sequence of a body. A body ends in an atomic value or %% values. Must check if vector first so do expr. @@ -1356,7 +1361,7 @@ new_fun_name(Type, #kern{func={F,Arity},fcount=C}=St) -> %% new_var_name(State) -> {VarName,State}. new_var_name(#kern{vcount=C}=St) -> - {list_to_atom("@k" ++ integer_to_list(C)),St#kern{vcount=C+1}}. + {C,St#kern{vcount=C+1}}. %% new_var(State) -> {#k_var{},State}. -- cgit v1.2.3