From 8aa4ff517c3dcc119ca593616875195895301189 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Fri, 19 Feb 2016 07:51:35 +0100 Subject: sys_core_fold: Optimize clause/4 Save work the *extremely* common case that the guard is a literal. --- lib/compiler/src/sys_core_fold.erl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'lib/compiler/src') diff --git a/lib/compiler/src/sys_core_fold.erl b/lib/compiler/src/sys_core_fold.erl index 43ce9a7172..b78507c47e 100644 --- a/lib/compiler/src/sys_core_fold.erl +++ b/lib/compiler/src/sys_core_fold.erl @@ -1081,12 +1081,16 @@ is_atom_or_var(_) -> false. clause(#c_clause{pats=Ps0,guard=G0,body=B0}=Cl, Cexpr, Ctxt, Sub0) -> {Ps1,Sub1} = pattern_list(Ps0, Sub0), Sub2 = update_types(Cexpr, Ps1, Sub1), - GSub = case {Cexpr,Ps1} of - {#c_var{name='_'},_} -> + GSub = case {Cexpr,Ps1,G0} of + {_,_,#c_literal{}} -> + %% No need for substitution tricks when the guard + %% does not contain any variables. + Sub2; + {#c_var{name='_'},_,_} -> %% In a 'receive', Cexpr is the variable '_', which represents the %% message being matched. We must NOT do any extra substiutions. Sub2; - {#c_var{},[#c_var{}=Var]} -> + {#c_var{},[#c_var{}=Var],_} -> %% The idea here is to optimize expressions such as %% %% case A of A -> ... -- cgit v1.2.3 From 6a7d8a49f0f33844a9b94fceb35079ac1b24e043 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 18 Feb 2016 16:20:52 +0100 Subject: sys_core_fold: Introduce var_list/2 As a preparation for checking binary patterns we will add var_list/2 that will work as pattern_list/2 but is guaranteed not to throw an exception. That way, we will only have to use try...catch for the few remaining calls to pattern_list/2. --- lib/compiler/src/sys_core_fold.erl | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'lib/compiler/src') diff --git a/lib/compiler/src/sys_core_fold.erl b/lib/compiler/src/sys_core_fold.erl index b78507c47e..e134a37f22 100644 --- a/lib/compiler/src/sys_core_fold.erl +++ b/lib/compiler/src/sys_core_fold.erl @@ -277,7 +277,7 @@ expr(#c_fun{}=Fun, effect, _) -> add_warning(Fun, useless_building), void(); expr(#c_fun{vars=Vs0,body=B0}=Fun, Ctxt0, Sub0) -> - {Vs1,Sub1} = pattern_list(Vs0, Sub0), + {Vs1,Sub1} = var_list(Vs0, Sub0), Ctxt = case Ctxt0 of {letrec,Ctxt1} -> Ctxt1; value -> value @@ -420,13 +420,13 @@ expr(#c_try{anno=A,arg=E0,vars=Vs0,body=B0,evars=Evs0,handler=H0}=Try, _, Sub0) %% Here is the general try/catch construct outside of guards. %% We can remove try if the value is simple and replace it with a let. E1 = body(E0, value, Sub0), - {Vs1,Sub1} = pattern_list(Vs0, Sub0), + {Vs1,Sub1} = var_list(Vs0, Sub0), B1 = body(B0, value, Sub1), case is_safe_simple(E1, Sub0) of true -> expr(#c_let{anno=A,vars=Vs1,arg=E1,body=B1}, value, Sub0); false -> - {Evs1,Sub2} = pattern_list(Evs0, Sub0), + {Evs1,Sub2} = var_list(Evs0, Sub0), H1 = body(H0, value, Sub2), Try#c_try{arg=E1,vars=Vs1,body=B1,evars=Evs1,handler=H1} end. @@ -1124,7 +1124,7 @@ clause(#c_clause{pats=Ps0,guard=G0,body=B0}=Cl, Cexpr, Ctxt, Sub0) -> %% the unsubstituted variables and values. let_substs(Vs0, As0, Sub0) -> - {Vs1,Sub1} = pattern_list(Vs0, Sub0), + {Vs1,Sub1} = var_list(Vs0, Sub0), {Vs2,As1,Ss} = let_substs_1(Vs1, As0, Sub1), Sub2 = sub_add_scope([V || #c_var{name=V} <- Vs2], Sub1), {Vs2,As1, @@ -1224,6 +1224,16 @@ pattern_list(Ps, Sub) -> pattern_list(Ps, Sub, Sub). pattern_list(Ps0, Isub, Osub0) -> mapfoldl(fun (P, Osub) -> pattern(P, Isub, Osub) end, Osub0, Ps0). +%% var_list([Var], InSub) -> {Pattern,OutSub}. +%% Works like pattern_list/2 but only accept variables and is +%% guaranteed not to throw an exception. + +var_list(Vs, Sub0) -> + mapfoldl(fun (#c_var{}=V, Sub) -> + pattern(V, Sub, Sub) + end, Sub0, Vs). + + %% is_subst(Expr) -> true | false. %% Test whether an expression is a suitable substitution. @@ -2255,11 +2265,11 @@ move_let_into_expr(#c_let{vars=InnerVs0,body=InnerBody0}=Inner, %% Arg = body(Arg0, Sub0), ScopeSub0 = sub_subst_scope(Sub0#sub{t=#{}}), - {OuterVs,ScopeSub} = pattern_list(OuterVs0, ScopeSub0), + {OuterVs,ScopeSub} = var_list(OuterVs0, ScopeSub0), OuterBody = body(OuterBody0, ScopeSub), - {InnerVs,Sub} = pattern_list(InnerVs0, Sub0), + {InnerVs,Sub} = var_list(InnerVs0, Sub0), InnerBody = body(InnerBody0, Sub), Outer#c_let{vars=OuterVs,arg=Arg, body=Inner#c_let{vars=InnerVs,arg=OuterBody,body=InnerBody}}; @@ -2599,7 +2609,7 @@ move_case_into_arg(#c_case{arg=#c_let{vars=OuterVars0,arg=OuterArg, %% in case of end %% ScopeSub0 = sub_subst_scope(Sub#sub{t=#{}}), - {OuterVars,ScopeSub} = pattern_list(OuterVars0, ScopeSub0), + {OuterVars,ScopeSub} = var_list(OuterVars0, ScopeSub0), InnerArg = body(InnerArg0, ScopeSub), Outer#c_let{vars=OuterVars,arg=OuterArg, body=Inner#c_case{arg=InnerArg,clauses=InnerClauses}}; -- cgit v1.2.3 From b9f0b1259da6a26982855f490ba31a0031fca247 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 23 Feb 2016 08:05:45 +0100 Subject: v3_core: Include line number annotations in binary patterns We will need them when we start to produce warnings for patterns that can't match. --- lib/compiler/src/v3_core.erl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib/compiler/src') diff --git a/lib/compiler/src/v3_core.erl b/lib/compiler/src/v3_core.erl index 72649e5c9f..68c9f964d8 100644 --- a/lib/compiler/src/v3_core.erl +++ b/lib/compiler/src/v3_core.erl @@ -1653,10 +1653,12 @@ pat_alias_map_pairs_1([]) -> []. pat_bin(Ps, St) -> [pat_segment(P, St) || P <- Ps]. -pat_segment({bin_element,_,Val,Size,[Type,{unit,Unit}|Flags]}, St) -> +pat_segment({bin_element,L,Val,Size,[Type,{unit,Unit}|Flags]}, St) -> + Anno = lineno_anno(L, St), {Pval,[],St1} = pattern(Val,St), {Psize,[],_St2} = pattern(Size,St1), - #c_bitstr{val=Pval,size=Psize, + #c_bitstr{anno=Anno, + val=Pval,size=Psize, unit=#c_literal{val=Unit}, type=#c_literal{val=Type}, flags=#c_literal{val=Flags}}. -- cgit v1.2.3 From 0d73923519dc9972a1cf31a5927692f51dbd7d8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 22 Feb 2016 07:17:29 +0100 Subject: Produce warnings for binary patterns that will never match MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Binary matching can be confusing. For example: 1> <<-1>> = <<-1>>. ** exception error: no match of right hand side value <<"ΓΏ">> 2> When constructing binaries, the value will be masked to fit in the binary segment. But no such masking happens when matching binaries. One solution that we considered was to do the same masking when matching. We have rejected that solution for several reasons: * Masking in construction is highly controversial and by some people considered a bad design decision. * While masking of unsigned numbers can be understood, masking of signed numbers it not easy to understand. * Then there is the question of backward compatibility. Adding masking to matching would mean that clauses that did not match earlier would start to match. That means that code that has never been tested will be executed. Code that has not been tested will usually not work. Therefore, we have decided to warn for binary patterns that cannot possibly match. While we are it, we will also warn for the following example where size for a binary segment is invalid: bad_size(Bin) -> BadSize = bad_size, <<42:BadSize>> = Bin. That example would crash the HiPE compiler because the BEAM compiler would generate a bs_get_integer2 instruction with an invalid size field. We can avoid that crash if sys_core_fold not only warns for bad binary pattern, but also removes the clauses that will not match. Reported-by: http://bugs.erlang.org/browse/ERL-44 Reported-by: Kostis Sagonas --- lib/compiler/src/sys_core_fold.erl | 210 +++++++++++++++++++++++++++++++------ 1 file changed, 179 insertions(+), 31 deletions(-) (limited to 'lib/compiler/src') diff --git a/lib/compiler/src/sys_core_fold.erl b/lib/compiler/src/sys_core_fold.erl index e134a37f22..ab67c8164b 100644 --- a/lib/compiler/src/sys_core_fold.erl +++ b/lib/compiler/src/sys_core_fold.erl @@ -1078,8 +1078,17 @@ is_atom_or_var(_) -> false. %% clause(Clause, Cepxr, Context, Sub) -> Clause. -clause(#c_clause{pats=Ps0,guard=G0,body=B0}=Cl, Cexpr, Ctxt, Sub0) -> - {Ps1,Sub1} = pattern_list(Ps0, Sub0), +clause(#c_clause{pats=Ps0}=Cl, Cexpr, Ctxt, Sub0) -> + try pattern_list(Ps0, Sub0) of + {Ps1,Sub1} -> + clause_1(Cl, Ps1, Cexpr, Ctxt, Sub1) + catch + nomatch -> + Cl#c_clause{anno=[compiler_generated], + guard=#c_literal{val=false}} + end. + +clause_1(#c_clause{guard=G0,body=B0}=Cl, Ps1, Cexpr, Ctxt, Sub1) -> Sub2 = update_types(Cexpr, Ps1, Sub1), GSub = case {Cexpr,Ps1,G0} of {_,_,#c_literal{}} -> @@ -1210,14 +1219,16 @@ bin_pattern_list(Ps0, Isub, Osub0) -> {Ps,{_,Osub}} = mapfoldl(fun bin_pattern/2, {Isub,Osub0}, Ps0), {Ps,Osub}. -bin_pattern(#c_bitstr{val=E0,size=Size0}=Pat, {Isub0,Osub0}) -> +bin_pattern(#c_bitstr{val=E0,size=Size0}=Pat0, {Isub0,Osub0}) -> Size1 = expr(Size0, Isub0), {E1,Osub} = pattern(E0, Isub0, Osub0), Isub = case E0 of #c_var{} -> sub_set_var(E0, E1, Isub0); _ -> Isub0 end, - {Pat#c_bitstr{val=E1,size=Size1},{Isub,Osub}}. + Pat = Pat0#c_bitstr{val=E1,size=Size1}, + bin_pat_warn(Pat), + {Pat,{Isub,Osub}}. pattern_list(Ps, Sub) -> pattern_list(Ps, Sub, Sub). @@ -1234,6 +1245,106 @@ var_list(Vs, Sub0) -> end, Sub0, Vs). +%%% +%%% Generate warnings for binary patterns that will not match. +%%% + +bin_pat_warn(#c_bitstr{type=#c_literal{val=Type}, + val=Val0, + size=#c_literal{val=Sz}, + unit=#c_literal{val=Unit}, + flags=Fl}=Pat) -> + case {Type,Sz} of + {_,_} when is_integer(Sz), Sz >= 0 -> ok; + {binary,all} -> ok; + {utf8,undefined} -> ok; + {utf16,undefined} -> ok; + {utf32,undefined} -> ok; + {_,_} -> + add_warning(Pat, {nomatch_bit_syntax_size,Sz}), + throw(nomatch) + end, + case {Type,Val0} of + {integer,#c_literal{val=Val}} when is_integer(Val) -> + Signedness = signedness(Fl), + TotalSz = Sz * Unit, + bit_pat_warn_int(Val, TotalSz, Signedness, Pat); + {float,#c_literal{val=Val}} when is_float(Val) -> + ok; + {utf8,#c_literal{val=Val}} when is_integer(Val) -> + bit_pat_warn_unicode(Val, Pat); + {utf16,#c_literal{val=Val}} when is_integer(Val) -> + bit_pat_warn_unicode(Val, Pat); + {utf32,#c_literal{val=Val}} when is_integer(Val) -> + bit_pat_warn_unicode(Val, Pat); + {_,#c_literal{val=Val}} -> + add_warning(Pat, {nomatch_bit_syntax_type,Val,Type}), + throw(nomatch); + {_,_} -> + ok + end; +bin_pat_warn(#c_bitstr{type=#c_literal{val=Type},val=Val0,flags=Fl}=Pat) -> + %% Size is variable. Not much that we can check. + case {Type,Val0} of + {integer,#c_literal{val=Val}} when is_integer(Val) -> + case signedness(Fl) of + unsigned when Val < 0 -> + add_warning(Pat, {nomatch_bit_syntax_unsigned,Val}), + throw(nomatch); + _ -> + ok + end; + {float,#c_literal{val=Val}} when is_float(Val) -> + ok; + {_,#c_literal{val=Val}} -> + add_warning(Pat, {nomatch_bit_syntax_type,Val,Type}), + throw(nomatch); + {_,_} -> + ok + end. + +bit_pat_warn_int(Val, 0, signed, Pat) -> + if + Val =:= 0 -> + ok; + true -> + add_warning(Pat, {nomatch_bit_syntax_truncated,signed,Val,0}), + throw(nomatch) + end; +bit_pat_warn_int(Val, Sz, signed, Pat) -> + if + Val < 0, Val bsr (Sz - 1) =/= -1 -> + add_warning(Pat, {nomatch_bit_syntax_truncated,signed,Val,Sz}), + throw(nomatch); + Val > 0, Val bsr (Sz - 1) =/= 0 -> + add_warning(Pat, {nomatch_bit_syntax_truncated,signed,Val,Sz}), + throw(nomatch); + true -> + ok + end; +bit_pat_warn_int(Val, _Sz, unsigned, Pat) when Val < 0 -> + add_warning(Pat, {nomatch_bit_syntax_unsigned,Val}), + throw(nomatch); +bit_pat_warn_int(Val, Sz, unsigned, Pat) -> + if + Val bsr Sz =:= 0 -> + ok; + true -> + add_warning(Pat, {nomatch_bit_syntax_truncated,unsigned,Val,Sz}), + throw(nomatch) + end. + +bit_pat_warn_unicode(U, _Pat) when 0 =< U, U =< 16#10FFFF -> + ok; +bit_pat_warn_unicode(U, Pat) -> + add_warning(Pat, {nomatch_bit_syntax_unicode,U}), + throw(nomatch). + +signedness(#c_literal{val=Flags}) -> + [S] = [F || F <- Flags, F =:= signed orelse F =:= unsigned], + S. + + %% is_subst(Expr) -> true | false. %% Test whether an expression is a suitable substitution. @@ -2285,39 +2396,49 @@ move_let_into_expr(#c_let{vars=Lvs0,body=Lbody0}=Let, case {TwoClauses,is_failing_clause(Ca0),is_failing_clause(Cb0)} of {true,false,true} -> %% let = case of - %% -> ; - %% -> erlang:error(...) + %% -> ; + %% -> erlang:error(...) %% end %% in %% %% ==> %% %% case of - %% -> + %% -> %% let = %% in ; - %% -> erlang:error(...) + %% -> erlang:error(...) %% end Cexpr = body(Cexpr0, Sub0), - CaVars0 = Ca0#c_clause.pats, + CaPats0 = Ca0#c_clause.pats, G0 = Ca0#c_clause.guard, B0 = Ca0#c_clause.body, ScopeSub0 = sub_subst_scope(Sub0#sub{t=#{}}), - {CaVars,ScopeSub} = pattern_list(CaVars0, ScopeSub0), - G = guard(G0, ScopeSub), - - B1 = body(B0, ScopeSub), - - {Lvs,B2,Sub1} = let_substs(Lvs0, B1, Sub0), - Sub2 = Sub1#sub{s=cerl_sets:union(ScopeSub#sub.s, - Sub1#sub.s)}, - Lbody = body(Lbody0, Sub2), - B = Let#c_let{vars=Lvs,arg=core_lib:make_values(B2),body=Lbody}, - - Ca = Ca0#c_clause{pats=CaVars,guard=G,body=B}, - Cb = clause(Cb0, Cexpr, value, Sub0), - Case#c_case{arg=Cexpr,clauses=[Ca,Cb]}; + try pattern_list(CaPats0, ScopeSub0) of + {CaPats,ScopeSub} -> + G = guard(G0, ScopeSub), + + B1 = body(B0, ScopeSub), + + {Lvs,B2,Sub1} = let_substs(Lvs0, B1, Sub0), + Sub2 = Sub1#sub{s=cerl_sets:union(ScopeSub#sub.s, + Sub1#sub.s)}, + Lbody = body(Lbody0, Sub2), + B = Let#c_let{vars=Lvs, + arg=core_lib:make_values(B2), + body=Lbody}, + + Ca = Ca0#c_clause{pats=CaPats,guard=G,body=B}, + Cb = clause(Cb0, Cexpr, value, Sub0), + Case#c_case{arg=Cexpr,clauses=[Ca,Cb]} + catch + nomatch -> + %% This is not a defeat. The code will eventually + %% be optimized to erlang:error(...) by the other + %% optimizations done in this module. + impossible + end; {_,_,_} -> impossible end; move_let_into_expr(#c_let{vars=Lvs0,body=Lbody0}=Let, @@ -2638,14 +2759,18 @@ move_case_into_arg(#c_case{arg=#c_case{arg=OuterArg, %% end %% ScopeSub0 = sub_subst_scope(Sub#sub{t=#{}}), - {OuterPats,ScopeSub} = pattern_list(OuterPats0, ScopeSub0), - OuterGuard = guard(OuterGuard0, ScopeSub), - InnerArg = body(InnerArg0, ScopeSub), - Inner = Inner0#c_case{arg=InnerArg,clauses=InnerClauses}, - OuterCa = OuterCa0#c_clause{pats=OuterPats,guard=OuterGuard, - body=Inner}, - Outer#c_case{arg=OuterArg, - clauses=[OuterCa,OuterCb]}; + + %% We KNOW that pattern_list/2 has already been called for OuterPats0; + %% therefore, it cannot throw an exception. + {OuterPats,ScopeSub} = pattern_list(OuterPats0, ScopeSub0), + OuterGuard = guard(OuterGuard0, ScopeSub), + InnerArg = body(InnerArg0, ScopeSub), + Inner = Inner0#c_case{arg=InnerArg,clauses=InnerClauses}, + OuterCa = OuterCa0#c_clause{pats=OuterPats, + guard=OuterGuard, + body=Inner}, + Outer#c_case{arg=OuterArg, + clauses=[OuterCa,OuterCb]}; false -> impossible end; @@ -3227,6 +3352,29 @@ format_error(nomatch_shadow) -> "this clause cannot match because a previous clause always matches"; format_error(nomatch_guard) -> "the guard for this clause evaluates to 'false'"; +format_error({nomatch_bit_syntax_truncated,Signess,Val,Sz}) -> + S = case Signess of + signed -> "a 'signed'"; + unsigned -> "an 'unsigned'" + end, + F = "this clause cannot match because the value ~P" + " will not fit in ~s binary segment of size ~p", + flatten(io_lib:format(F, [Val,10,S,Sz])); +format_error({nomatch_bit_syntax_unsigned,Val}) -> + F = "this clause cannot match because the negative value ~P" + " will never match the value of an 'unsigned' binary segment", + flatten(io_lib:format(F, [Val,10])); +format_error({nomatch_bit_syntax_size,Sz}) -> + F = "this clause cannot match because '~P' is not a valid size for a binary segment", + flatten(io_lib:format(F, [Sz,10])); +format_error({nomatch_bit_syntax_type,Val,Type}) -> + F = "this clause cannot match because '~P' is not of the" + " expected type '~p'", + flatten(io_lib:format(F, [Val,10,Type])); +format_error({nomatch_bit_syntax_unicode,Val}) -> + F = "this clause cannot match because the value ~p" + " is not a valid Unicode code point", + flatten(io_lib:format(F, [Val])); format_error(no_clause_match) -> "no clause will ever match"; format_error(nomatch_clause_type) -> -- cgit v1.2.3