diff options
author | Björn Gustavsson <[email protected]> | 2016-02-22 07:17:29 +0100 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2016-02-25 10:27:58 +0100 |
commit | 0d73923519dc9972a1cf31a5927692f51dbd7d8b (patch) | |
tree | 46dac92b89ada65857601c2619c61c1c025302c4 | |
parent | b9f0b1259da6a26982855f490ba31a0031fca247 (diff) | |
download | otp-0d73923519dc9972a1cf31a5927692f51dbd7d8b.tar.gz otp-0d73923519dc9972a1cf31a5927692f51dbd7d8b.tar.bz2 otp-0d73923519dc9972a1cf31a5927692f51dbd7d8b.zip |
Produce warnings for binary patterns that will never match
Binary matching can be confusing. For example:
1> <<-1>> = <<-1>>.
** exception error: no match of right hand side value <<"ÿ">>
2>
When constructing binaries, the value will be masked to fit in
the binary segment. But no such masking happens when matching
binaries.
One solution that we considered was to do the same masking when
matching. We have rejected that solution for several reasons:
* Masking in construction is highly controversial and by some
people considered a bad design decision.
* While masking of unsigned numbers can be understood, masking of
signed numbers it not easy to understand.
* Then there is the question of backward compatibility. Adding
masking to matching would mean that clauses that did not match
earlier would start to match. That means that code that has
never been tested will be executed. Code that has not been
tested will usually not work.
Therefore, we have decided to warn for binary patterns that cannot
possibly match.
While we are it, we will also warn for the following example where
size for a binary segment is invalid:
bad_size(Bin) ->
BadSize = bad_size,
<<42:BadSize>> = Bin.
That example would crash the HiPE compiler because the BEAM compiler
would generate a bs_get_integer2 instruction with an invalid size
field. We can avoid that crash if sys_core_fold not only warns for bad
binary pattern, but also removes the clauses that will not match.
Reported-by: http://bugs.erlang.org/browse/ERL-44
Reported-by: Kostis Sagonas
-rw-r--r-- | lib/compiler/src/sys_core_fold.erl | 210 | ||||
-rw-r--r-- | lib/compiler/test/bs_match_SUITE.erl | 97 | ||||
-rw-r--r-- | lib/compiler/test/warnings_SUITE.erl | 49 |
3 files changed, 321 insertions, 35 deletions
diff --git a/lib/compiler/src/sys_core_fold.erl b/lib/compiler/src/sys_core_fold.erl index e134a37f22..ab67c8164b 100644 --- a/lib/compiler/src/sys_core_fold.erl +++ b/lib/compiler/src/sys_core_fold.erl @@ -1078,8 +1078,17 @@ is_atom_or_var(_) -> false. %% clause(Clause, Cepxr, Context, Sub) -> Clause. -clause(#c_clause{pats=Ps0,guard=G0,body=B0}=Cl, Cexpr, Ctxt, Sub0) -> - {Ps1,Sub1} = pattern_list(Ps0, Sub0), +clause(#c_clause{pats=Ps0}=Cl, Cexpr, Ctxt, Sub0) -> + try pattern_list(Ps0, Sub0) of + {Ps1,Sub1} -> + clause_1(Cl, Ps1, Cexpr, Ctxt, Sub1) + catch + nomatch -> + Cl#c_clause{anno=[compiler_generated], + guard=#c_literal{val=false}} + end. + +clause_1(#c_clause{guard=G0,body=B0}=Cl, Ps1, Cexpr, Ctxt, Sub1) -> Sub2 = update_types(Cexpr, Ps1, Sub1), GSub = case {Cexpr,Ps1,G0} of {_,_,#c_literal{}} -> @@ -1210,14 +1219,16 @@ bin_pattern_list(Ps0, Isub, Osub0) -> {Ps,{_,Osub}} = mapfoldl(fun bin_pattern/2, {Isub,Osub0}, Ps0), {Ps,Osub}. -bin_pattern(#c_bitstr{val=E0,size=Size0}=Pat, {Isub0,Osub0}) -> +bin_pattern(#c_bitstr{val=E0,size=Size0}=Pat0, {Isub0,Osub0}) -> Size1 = expr(Size0, Isub0), {E1,Osub} = pattern(E0, Isub0, Osub0), Isub = case E0 of #c_var{} -> sub_set_var(E0, E1, Isub0); _ -> Isub0 end, - {Pat#c_bitstr{val=E1,size=Size1},{Isub,Osub}}. + Pat = Pat0#c_bitstr{val=E1,size=Size1}, + bin_pat_warn(Pat), + {Pat,{Isub,Osub}}. pattern_list(Ps, Sub) -> pattern_list(Ps, Sub, Sub). @@ -1234,6 +1245,106 @@ var_list(Vs, Sub0) -> end, Sub0, Vs). +%%% +%%% Generate warnings for binary patterns that will not match. +%%% + +bin_pat_warn(#c_bitstr{type=#c_literal{val=Type}, + val=Val0, + size=#c_literal{val=Sz}, + unit=#c_literal{val=Unit}, + flags=Fl}=Pat) -> + case {Type,Sz} of + {_,_} when is_integer(Sz), Sz >= 0 -> ok; + {binary,all} -> ok; + {utf8,undefined} -> ok; + {utf16,undefined} -> ok; + {utf32,undefined} -> ok; + {_,_} -> + add_warning(Pat, {nomatch_bit_syntax_size,Sz}), + throw(nomatch) + end, + case {Type,Val0} of + {integer,#c_literal{val=Val}} when is_integer(Val) -> + Signedness = signedness(Fl), + TotalSz = Sz * Unit, + bit_pat_warn_int(Val, TotalSz, Signedness, Pat); + {float,#c_literal{val=Val}} when is_float(Val) -> + ok; + {utf8,#c_literal{val=Val}} when is_integer(Val) -> + bit_pat_warn_unicode(Val, Pat); + {utf16,#c_literal{val=Val}} when is_integer(Val) -> + bit_pat_warn_unicode(Val, Pat); + {utf32,#c_literal{val=Val}} when is_integer(Val) -> + bit_pat_warn_unicode(Val, Pat); + {_,#c_literal{val=Val}} -> + add_warning(Pat, {nomatch_bit_syntax_type,Val,Type}), + throw(nomatch); + {_,_} -> + ok + end; +bin_pat_warn(#c_bitstr{type=#c_literal{val=Type},val=Val0,flags=Fl}=Pat) -> + %% Size is variable. Not much that we can check. + case {Type,Val0} of + {integer,#c_literal{val=Val}} when is_integer(Val) -> + case signedness(Fl) of + unsigned when Val < 0 -> + add_warning(Pat, {nomatch_bit_syntax_unsigned,Val}), + throw(nomatch); + _ -> + ok + end; + {float,#c_literal{val=Val}} when is_float(Val) -> + ok; + {_,#c_literal{val=Val}} -> + add_warning(Pat, {nomatch_bit_syntax_type,Val,Type}), + throw(nomatch); + {_,_} -> + ok + end. + +bit_pat_warn_int(Val, 0, signed, Pat) -> + if + Val =:= 0 -> + ok; + true -> + add_warning(Pat, {nomatch_bit_syntax_truncated,signed,Val,0}), + throw(nomatch) + end; +bit_pat_warn_int(Val, Sz, signed, Pat) -> + if + Val < 0, Val bsr (Sz - 1) =/= -1 -> + add_warning(Pat, {nomatch_bit_syntax_truncated,signed,Val,Sz}), + throw(nomatch); + Val > 0, Val bsr (Sz - 1) =/= 0 -> + add_warning(Pat, {nomatch_bit_syntax_truncated,signed,Val,Sz}), + throw(nomatch); + true -> + ok + end; +bit_pat_warn_int(Val, _Sz, unsigned, Pat) when Val < 0 -> + add_warning(Pat, {nomatch_bit_syntax_unsigned,Val}), + throw(nomatch); +bit_pat_warn_int(Val, Sz, unsigned, Pat) -> + if + Val bsr Sz =:= 0 -> + ok; + true -> + add_warning(Pat, {nomatch_bit_syntax_truncated,unsigned,Val,Sz}), + throw(nomatch) + end. + +bit_pat_warn_unicode(U, _Pat) when 0 =< U, U =< 16#10FFFF -> + ok; +bit_pat_warn_unicode(U, Pat) -> + add_warning(Pat, {nomatch_bit_syntax_unicode,U}), + throw(nomatch). + +signedness(#c_literal{val=Flags}) -> + [S] = [F || F <- Flags, F =:= signed orelse F =:= unsigned], + S. + + %% is_subst(Expr) -> true | false. %% Test whether an expression is a suitable substitution. @@ -2285,39 +2396,49 @@ move_let_into_expr(#c_let{vars=Lvs0,body=Lbody0}=Let, case {TwoClauses,is_failing_clause(Ca0),is_failing_clause(Cb0)} of {true,false,true} -> %% let <Lvars> = case <Case-expr> of - %% <Cvars> -> <Clause-body>; - %% <OtherCvars> -> erlang:error(...) + %% <Cpats> -> <Clause-body>; + %% <OtherCpats> -> erlang:error(...) %% end %% in <Let-body> %% %% ==> %% %% case <Case-expr> of - %% <Cvars> -> + %% <Cpats> -> %% let <Lvars> = <Clause-body> %% in <Let-body>; - %% <OtherCvars> -> erlang:error(...) + %% <OtherCpats> -> erlang:error(...) %% end Cexpr = body(Cexpr0, Sub0), - CaVars0 = Ca0#c_clause.pats, + CaPats0 = Ca0#c_clause.pats, G0 = Ca0#c_clause.guard, B0 = Ca0#c_clause.body, ScopeSub0 = sub_subst_scope(Sub0#sub{t=#{}}), - {CaVars,ScopeSub} = pattern_list(CaVars0, ScopeSub0), - G = guard(G0, ScopeSub), - - B1 = body(B0, ScopeSub), - - {Lvs,B2,Sub1} = let_substs(Lvs0, B1, Sub0), - Sub2 = Sub1#sub{s=cerl_sets:union(ScopeSub#sub.s, - Sub1#sub.s)}, - Lbody = body(Lbody0, Sub2), - B = Let#c_let{vars=Lvs,arg=core_lib:make_values(B2),body=Lbody}, - - Ca = Ca0#c_clause{pats=CaVars,guard=G,body=B}, - Cb = clause(Cb0, Cexpr, value, Sub0), - Case#c_case{arg=Cexpr,clauses=[Ca,Cb]}; + try pattern_list(CaPats0, ScopeSub0) of + {CaPats,ScopeSub} -> + G = guard(G0, ScopeSub), + + B1 = body(B0, ScopeSub), + + {Lvs,B2,Sub1} = let_substs(Lvs0, B1, Sub0), + Sub2 = Sub1#sub{s=cerl_sets:union(ScopeSub#sub.s, + Sub1#sub.s)}, + Lbody = body(Lbody0, Sub2), + B = Let#c_let{vars=Lvs, + arg=core_lib:make_values(B2), + body=Lbody}, + + Ca = Ca0#c_clause{pats=CaPats,guard=G,body=B}, + Cb = clause(Cb0, Cexpr, value, Sub0), + Case#c_case{arg=Cexpr,clauses=[Ca,Cb]} + catch + nomatch -> + %% This is not a defeat. The code will eventually + %% be optimized to erlang:error(...) by the other + %% optimizations done in this module. + impossible + end; {_,_,_} -> impossible end; move_let_into_expr(#c_let{vars=Lvs0,body=Lbody0}=Let, @@ -2638,14 +2759,18 @@ move_case_into_arg(#c_case{arg=#c_case{arg=OuterArg, %% end %% ScopeSub0 = sub_subst_scope(Sub#sub{t=#{}}), - {OuterPats,ScopeSub} = pattern_list(OuterPats0, ScopeSub0), - OuterGuard = guard(OuterGuard0, ScopeSub), - InnerArg = body(InnerArg0, ScopeSub), - Inner = Inner0#c_case{arg=InnerArg,clauses=InnerClauses}, - OuterCa = OuterCa0#c_clause{pats=OuterPats,guard=OuterGuard, - body=Inner}, - Outer#c_case{arg=OuterArg, - clauses=[OuterCa,OuterCb]}; + + %% We KNOW that pattern_list/2 has already been called for OuterPats0; + %% therefore, it cannot throw an exception. + {OuterPats,ScopeSub} = pattern_list(OuterPats0, ScopeSub0), + OuterGuard = guard(OuterGuard0, ScopeSub), + InnerArg = body(InnerArg0, ScopeSub), + Inner = Inner0#c_case{arg=InnerArg,clauses=InnerClauses}, + OuterCa = OuterCa0#c_clause{pats=OuterPats, + guard=OuterGuard, + body=Inner}, + Outer#c_case{arg=OuterArg, + clauses=[OuterCa,OuterCb]}; false -> impossible end; @@ -3227,6 +3352,29 @@ format_error(nomatch_shadow) -> "this clause cannot match because a previous clause always matches"; format_error(nomatch_guard) -> "the guard for this clause evaluates to 'false'"; +format_error({nomatch_bit_syntax_truncated,Signess,Val,Sz}) -> + S = case Signess of + signed -> "a 'signed'"; + unsigned -> "an 'unsigned'" + end, + F = "this clause cannot match because the value ~P" + " will not fit in ~s binary segment of size ~p", + flatten(io_lib:format(F, [Val,10,S,Sz])); +format_error({nomatch_bit_syntax_unsigned,Val}) -> + F = "this clause cannot match because the negative value ~P" + " will never match the value of an 'unsigned' binary segment", + flatten(io_lib:format(F, [Val,10])); +format_error({nomatch_bit_syntax_size,Sz}) -> + F = "this clause cannot match because '~P' is not a valid size for a binary segment", + flatten(io_lib:format(F, [Sz,10])); +format_error({nomatch_bit_syntax_type,Val,Type}) -> + F = "this clause cannot match because '~P' is not of the" + " expected type '~p'", + flatten(io_lib:format(F, [Val,10,Type])); +format_error({nomatch_bit_syntax_unicode,Val}) -> + F = "this clause cannot match because the value ~p" + " is not a valid Unicode code point", + flatten(io_lib:format(F, [Val])); format_error(no_clause_match) -> "no clause will ever match"; format_error(nomatch_clause_type) -> diff --git a/lib/compiler/test/bs_match_SUITE.erl b/lib/compiler/test/bs_match_SUITE.erl index 99539f3779..b79feb1393 100644 --- a/lib/compiler/test/bs_match_SUITE.erl +++ b/lib/compiler/test/bs_match_SUITE.erl @@ -37,11 +37,13 @@ cover_beam_bool/1,matched_out_size/1,follow_fail_branch/1, no_partition/1,calling_a_binary/1,binary_in_map/1, match_string_opt/1,select_on_integer/1, - map_and_binary/1,unsafe_branch_caching/1]). + map_and_binary/1,unsafe_branch_caching/1, + bad_literals/1,good_literals/1]). -export([coverage_id/1,coverage_external_ignore/2]). -include_lib("common_test/include/ct.hrl"). +-include_lib("syntax_tools/include/merl.hrl"). suite() -> [{ct_hooks,[ts_install_cth]}]. @@ -64,7 +66,8 @@ groups() -> cover_beam_bool,matched_out_size,follow_fail_branch, no_partition,calling_a_binary,binary_in_map, match_string_opt,select_on_integer, - map_and_binary,unsafe_branch_caching]}]. + map_and_binary,unsafe_branch_caching, + bad_literals,good_literals]}]. init_per_suite(Config) -> @@ -1293,6 +1296,96 @@ do_unsafe_branch_caching(<<Code/integer, Bin/binary>>) -> _ -> Bin2 end. +bad_literals(_Config) -> + Mod = list_to_atom(?MODULE_STRING ++ "_" ++ + atom_to_list(?FUNCTION_NAME)), + S = [signed_lit_match(V, Sz) || V <- lists:seq(-8, 8), + Sz <- [0,1,2,3]] ++ + [unsigned_lit_match(V, Sz) || V <- lists:seq(-2, 8), + Sz <- [0,1,2]] ++ + [unicode_match(V) || + V <- [-100,-1,0,1,2|lists:seq(16#10FFFC, 16#110004)]], + Code = ?Q(["-module('@Mod@').\n" + "-export([f/0]).\n" + "f() ->\n" + "_@S,\n" + "ok.\n"]), + merl:print(Code), + Opts = test_lib:opt_opts(?MODULE), + {ok,_} = merl:compile_and_load(Code, Opts), + Mod:f(), + + {'EXIT',<<42>>} = (catch bad_literals_1()), + + Sz = id(8), + {'EXIT',{{badmatch,_},_}} = (catch <<-1:Sz>> = <<-1>>), + ok. + +bad_literals_1() -> + BadSz = bad, + case case <<42>> of + <<42:BadSz>> -> ok; + Val -> exit(Val) + end of + ok -> ok; + error -> error + end. + +signed_lit_match(V, Sz) -> + case <<V:Sz>> of + <<V:Sz/signed>> -> + ?Q("<<_@V@:_@Sz@/signed>> = <<_@V@:_@Sz@>>"); + _ -> + ?Q(["case <<_@V@:_@Sz@>> of\n", + " <<_@V@:_@Sz@/signed>> ->\n", + " ct:fail(should_not_match);\n", + " _ ->\n", + " ok\n", + "end\n"]) + end. + +unsigned_lit_match(V, Sz) -> + case <<V:Sz>> of + <<V:Sz/unsigned>> -> + ?Q("<<_@V@:_@Sz@>> = <<_@V@:_@Sz@>>"); + _ -> + ?Q(["case <<_@V@:_@Sz@>> of\n", + " <<_@V@:_@Sz@/unsigned>> ->\n", + " ct:fail(should_not_match);\n", + " _ ->\n", + " ok\n", + "end\n"]) + end. + +unicode_match(V) -> + try <<V/utf8>> of + <<V/utf8>> -> + ?Q(["<<_@V@/utf8>> = <<_@V@/utf8>>,\n", + "<<_@V@/utf16>> = <<_@V@/utf16>>,\n", + "<<_@V@/utf32>> = <<_@V@/utf32>>\n"]) + catch + error:badarg -> + ?Q(["case <<_@V@:32>> of\n", + " <<_@V@/utf32>> ->\n", + " ct:fail(should_not_match);\n", + " _ ->\n", + " ok\n", + "end\n"]) + end. + +%% Test a few legal but rare cases. + +good_literals(_Config) -> + Sz = id(64), + + %% Variable size. + <<42:Sz>> = id(<<42:Sz>>), + <<42.0:Sz/float>> = id(<<42:Sz/float>>), + + %% unit > 1 + <<16#cafebeef:4/unit:8>> = id(<<16#cafebeef:32>>), + ok. + check(F, R) -> R = F(). diff --git a/lib/compiler/test/warnings_SUITE.erl b/lib/compiler/test/warnings_SUITE.erl index 2a425c2ae5..df5f6760e4 100644 --- a/lib/compiler/test/warnings_SUITE.erl +++ b/lib/compiler/test/warnings_SUITE.erl @@ -41,7 +41,8 @@ files/1,effect/1,bin_opt_info/1,bin_construction/1, comprehensions/1,maps/1,maps_bin_opt_info/1, redundant_boolean_clauses/1, - latin1_fallback/1,underscore/1,no_warnings/1]). + latin1_fallback/1,underscore/1,no_warnings/1, + bit_syntax/1]). % Default timetrap timeout (set in init_per_testcase). -define(default_timeout, ?t:minutes(2)). @@ -68,7 +69,7 @@ groups() -> bin_opt_info,bin_construction,comprehensions,maps, maps_bin_opt_info, redundant_boolean_clauses,latin1_fallback, - underscore,no_warnings]}]. + underscore,no_warnings,bit_syntax]}]. init_per_suite(Config) -> Config. @@ -783,6 +784,50 @@ no_warnings(Config) when is_list(Config) -> run(Config, Ts), ok. +bit_syntax(Config) -> + Ts = [{?FUNCTION_NAME, + <<"a(<<-1>>) -> ok; + a(<<1023>>) -> ok; + a(<<777/signed>>) -> ok; + a(<<a/binary>>) -> ok; + a(<<a/integer>>) -> ok; + a(<<a/float>>) -> ok; + a(<<a/utf8>>) -> ok; + a(<<a/utf16>>) -> ok; + a(<<a/utf32>>) -> ok; + a(<<a/utf32>>) -> ok. + b(Bin) -> Sz = bad, <<42:Sz>> = Bin. + c(Sz, Bin) -> + case Bin of + <<-42:Sz/unsigned>> -> ok; + <<42:Sz/float>> -> ok; + <<42:Sz/binary>> -> ok + end. + ">>, + [], + {warnings,[{1,sys_core_fold,no_clause_match}, + {1,sys_core_fold,{nomatch_bit_syntax_unsigned,-1}}, + {2,sys_core_fold,{nomatch_bit_syntax_truncated, + unsigned,1023,8}}, + {3,sys_core_fold,{nomatch_bit_syntax_truncated, + signed,777,8}}, + {4,sys_core_fold,{nomatch_bit_syntax_type,a,binary}}, + {5,sys_core_fold,{nomatch_bit_syntax_type,a,integer}}, + {6,sys_core_fold,{nomatch_bit_syntax_type,a,float}}, + {7,sys_core_fold,{nomatch_bit_syntax_type,a,utf8}}, + {8,sys_core_fold,{nomatch_bit_syntax_type,a,utf16}}, + {9,sys_core_fold,{nomatch_bit_syntax_type,a,utf32}}, + {10,sys_core_fold,{nomatch_bit_syntax_type,a,utf32}}, + {11,sys_core_fold,no_clause_match}, + {11,sys_core_fold,{nomatch_bit_syntax_size,bad}}, + {14,sys_core_fold,{nomatch_bit_syntax_unsigned,-42}}, + {16,sys_core_fold,{nomatch_bit_syntax_type,42,binary}} + ]} + }], + run(Config, Ts), + ok. + + %%% %%% End of test cases. %%% |