aboutsummaryrefslogtreecommitdiffstats
path: root/lib/compiler
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2016-02-22 07:17:29 +0100
committerBjörn Gustavsson <[email protected]>2016-02-25 10:27:58 +0100
commit0d73923519dc9972a1cf31a5927692f51dbd7d8b (patch)
tree46dac92b89ada65857601c2619c61c1c025302c4 /lib/compiler
parentb9f0b1259da6a26982855f490ba31a0031fca247 (diff)
downloadotp-0d73923519dc9972a1cf31a5927692f51dbd7d8b.tar.gz
otp-0d73923519dc9972a1cf31a5927692f51dbd7d8b.tar.bz2
otp-0d73923519dc9972a1cf31a5927692f51dbd7d8b.zip
Produce warnings for binary patterns that will never match
Binary matching can be confusing. For example: 1> <<-1>> = <<-1>>. ** exception error: no match of right hand side value <<"ÿ">> 2> When constructing binaries, the value will be masked to fit in the binary segment. But no such masking happens when matching binaries. One solution that we considered was to do the same masking when matching. We have rejected that solution for several reasons: * Masking in construction is highly controversial and by some people considered a bad design decision. * While masking of unsigned numbers can be understood, masking of signed numbers it not easy to understand. * Then there is the question of backward compatibility. Adding masking to matching would mean that clauses that did not match earlier would start to match. That means that code that has never been tested will be executed. Code that has not been tested will usually not work. Therefore, we have decided to warn for binary patterns that cannot possibly match. While we are it, we will also warn for the following example where size for a binary segment is invalid: bad_size(Bin) -> BadSize = bad_size, <<42:BadSize>> = Bin. That example would crash the HiPE compiler because the BEAM compiler would generate a bs_get_integer2 instruction with an invalid size field. We can avoid that crash if sys_core_fold not only warns for bad binary pattern, but also removes the clauses that will not match. Reported-by: http://bugs.erlang.org/browse/ERL-44 Reported-by: Kostis Sagonas
Diffstat (limited to 'lib/compiler')
-rw-r--r--lib/compiler/src/sys_core_fold.erl210
-rw-r--r--lib/compiler/test/bs_match_SUITE.erl97
-rw-r--r--lib/compiler/test/warnings_SUITE.erl49
3 files changed, 321 insertions, 35 deletions
diff --git a/lib/compiler/src/sys_core_fold.erl b/lib/compiler/src/sys_core_fold.erl
index e134a37f22..ab67c8164b 100644
--- a/lib/compiler/src/sys_core_fold.erl
+++ b/lib/compiler/src/sys_core_fold.erl
@@ -1078,8 +1078,17 @@ is_atom_or_var(_) -> false.
%% clause(Clause, Cepxr, Context, Sub) -> Clause.
-clause(#c_clause{pats=Ps0,guard=G0,body=B0}=Cl, Cexpr, Ctxt, Sub0) ->
- {Ps1,Sub1} = pattern_list(Ps0, Sub0),
+clause(#c_clause{pats=Ps0}=Cl, Cexpr, Ctxt, Sub0) ->
+ try pattern_list(Ps0, Sub0) of
+ {Ps1,Sub1} ->
+ clause_1(Cl, Ps1, Cexpr, Ctxt, Sub1)
+ catch
+ nomatch ->
+ Cl#c_clause{anno=[compiler_generated],
+ guard=#c_literal{val=false}}
+ end.
+
+clause_1(#c_clause{guard=G0,body=B0}=Cl, Ps1, Cexpr, Ctxt, Sub1) ->
Sub2 = update_types(Cexpr, Ps1, Sub1),
GSub = case {Cexpr,Ps1,G0} of
{_,_,#c_literal{}} ->
@@ -1210,14 +1219,16 @@ bin_pattern_list(Ps0, Isub, Osub0) ->
{Ps,{_,Osub}} = mapfoldl(fun bin_pattern/2, {Isub,Osub0}, Ps0),
{Ps,Osub}.
-bin_pattern(#c_bitstr{val=E0,size=Size0}=Pat, {Isub0,Osub0}) ->
+bin_pattern(#c_bitstr{val=E0,size=Size0}=Pat0, {Isub0,Osub0}) ->
Size1 = expr(Size0, Isub0),
{E1,Osub} = pattern(E0, Isub0, Osub0),
Isub = case E0 of
#c_var{} -> sub_set_var(E0, E1, Isub0);
_ -> Isub0
end,
- {Pat#c_bitstr{val=E1,size=Size1},{Isub,Osub}}.
+ Pat = Pat0#c_bitstr{val=E1,size=Size1},
+ bin_pat_warn(Pat),
+ {Pat,{Isub,Osub}}.
pattern_list(Ps, Sub) -> pattern_list(Ps, Sub, Sub).
@@ -1234,6 +1245,106 @@ var_list(Vs, Sub0) ->
end, Sub0, Vs).
+%%%
+%%% Generate warnings for binary patterns that will not match.
+%%%
+
+bin_pat_warn(#c_bitstr{type=#c_literal{val=Type},
+ val=Val0,
+ size=#c_literal{val=Sz},
+ unit=#c_literal{val=Unit},
+ flags=Fl}=Pat) ->
+ case {Type,Sz} of
+ {_,_} when is_integer(Sz), Sz >= 0 -> ok;
+ {binary,all} -> ok;
+ {utf8,undefined} -> ok;
+ {utf16,undefined} -> ok;
+ {utf32,undefined} -> ok;
+ {_,_} ->
+ add_warning(Pat, {nomatch_bit_syntax_size,Sz}),
+ throw(nomatch)
+ end,
+ case {Type,Val0} of
+ {integer,#c_literal{val=Val}} when is_integer(Val) ->
+ Signedness = signedness(Fl),
+ TotalSz = Sz * Unit,
+ bit_pat_warn_int(Val, TotalSz, Signedness, Pat);
+ {float,#c_literal{val=Val}} when is_float(Val) ->
+ ok;
+ {utf8,#c_literal{val=Val}} when is_integer(Val) ->
+ bit_pat_warn_unicode(Val, Pat);
+ {utf16,#c_literal{val=Val}} when is_integer(Val) ->
+ bit_pat_warn_unicode(Val, Pat);
+ {utf32,#c_literal{val=Val}} when is_integer(Val) ->
+ bit_pat_warn_unicode(Val, Pat);
+ {_,#c_literal{val=Val}} ->
+ add_warning(Pat, {nomatch_bit_syntax_type,Val,Type}),
+ throw(nomatch);
+ {_,_} ->
+ ok
+ end;
+bin_pat_warn(#c_bitstr{type=#c_literal{val=Type},val=Val0,flags=Fl}=Pat) ->
+ %% Size is variable. Not much that we can check.
+ case {Type,Val0} of
+ {integer,#c_literal{val=Val}} when is_integer(Val) ->
+ case signedness(Fl) of
+ unsigned when Val < 0 ->
+ add_warning(Pat, {nomatch_bit_syntax_unsigned,Val}),
+ throw(nomatch);
+ _ ->
+ ok
+ end;
+ {float,#c_literal{val=Val}} when is_float(Val) ->
+ ok;
+ {_,#c_literal{val=Val}} ->
+ add_warning(Pat, {nomatch_bit_syntax_type,Val,Type}),
+ throw(nomatch);
+ {_,_} ->
+ ok
+ end.
+
+bit_pat_warn_int(Val, 0, signed, Pat) ->
+ if
+ Val =:= 0 ->
+ ok;
+ true ->
+ add_warning(Pat, {nomatch_bit_syntax_truncated,signed,Val,0}),
+ throw(nomatch)
+ end;
+bit_pat_warn_int(Val, Sz, signed, Pat) ->
+ if
+ Val < 0, Val bsr (Sz - 1) =/= -1 ->
+ add_warning(Pat, {nomatch_bit_syntax_truncated,signed,Val,Sz}),
+ throw(nomatch);
+ Val > 0, Val bsr (Sz - 1) =/= 0 ->
+ add_warning(Pat, {nomatch_bit_syntax_truncated,signed,Val,Sz}),
+ throw(nomatch);
+ true ->
+ ok
+ end;
+bit_pat_warn_int(Val, _Sz, unsigned, Pat) when Val < 0 ->
+ add_warning(Pat, {nomatch_bit_syntax_unsigned,Val}),
+ throw(nomatch);
+bit_pat_warn_int(Val, Sz, unsigned, Pat) ->
+ if
+ Val bsr Sz =:= 0 ->
+ ok;
+ true ->
+ add_warning(Pat, {nomatch_bit_syntax_truncated,unsigned,Val,Sz}),
+ throw(nomatch)
+ end.
+
+bit_pat_warn_unicode(U, _Pat) when 0 =< U, U =< 16#10FFFF ->
+ ok;
+bit_pat_warn_unicode(U, Pat) ->
+ add_warning(Pat, {nomatch_bit_syntax_unicode,U}),
+ throw(nomatch).
+
+signedness(#c_literal{val=Flags}) ->
+ [S] = [F || F <- Flags, F =:= signed orelse F =:= unsigned],
+ S.
+
+
%% is_subst(Expr) -> true | false.
%% Test whether an expression is a suitable substitution.
@@ -2285,39 +2396,49 @@ move_let_into_expr(#c_let{vars=Lvs0,body=Lbody0}=Let,
case {TwoClauses,is_failing_clause(Ca0),is_failing_clause(Cb0)} of
{true,false,true} ->
%% let <Lvars> = case <Case-expr> of
- %% <Cvars> -> <Clause-body>;
- %% <OtherCvars> -> erlang:error(...)
+ %% <Cpats> -> <Clause-body>;
+ %% <OtherCpats> -> erlang:error(...)
%% end
%% in <Let-body>
%%
%% ==>
%%
%% case <Case-expr> of
- %% <Cvars> ->
+ %% <Cpats> ->
%% let <Lvars> = <Clause-body>
%% in <Let-body>;
- %% <OtherCvars> -> erlang:error(...)
+ %% <OtherCpats> -> erlang:error(...)
%% end
Cexpr = body(Cexpr0, Sub0),
- CaVars0 = Ca0#c_clause.pats,
+ CaPats0 = Ca0#c_clause.pats,
G0 = Ca0#c_clause.guard,
B0 = Ca0#c_clause.body,
ScopeSub0 = sub_subst_scope(Sub0#sub{t=#{}}),
- {CaVars,ScopeSub} = pattern_list(CaVars0, ScopeSub0),
- G = guard(G0, ScopeSub),
-
- B1 = body(B0, ScopeSub),
-
- {Lvs,B2,Sub1} = let_substs(Lvs0, B1, Sub0),
- Sub2 = Sub1#sub{s=cerl_sets:union(ScopeSub#sub.s,
- Sub1#sub.s)},
- Lbody = body(Lbody0, Sub2),
- B = Let#c_let{vars=Lvs,arg=core_lib:make_values(B2),body=Lbody},
-
- Ca = Ca0#c_clause{pats=CaVars,guard=G,body=B},
- Cb = clause(Cb0, Cexpr, value, Sub0),
- Case#c_case{arg=Cexpr,clauses=[Ca,Cb]};
+ try pattern_list(CaPats0, ScopeSub0) of
+ {CaPats,ScopeSub} ->
+ G = guard(G0, ScopeSub),
+
+ B1 = body(B0, ScopeSub),
+
+ {Lvs,B2,Sub1} = let_substs(Lvs0, B1, Sub0),
+ Sub2 = Sub1#sub{s=cerl_sets:union(ScopeSub#sub.s,
+ Sub1#sub.s)},
+ Lbody = body(Lbody0, Sub2),
+ B = Let#c_let{vars=Lvs,
+ arg=core_lib:make_values(B2),
+ body=Lbody},
+
+ Ca = Ca0#c_clause{pats=CaPats,guard=G,body=B},
+ Cb = clause(Cb0, Cexpr, value, Sub0),
+ Case#c_case{arg=Cexpr,clauses=[Ca,Cb]}
+ catch
+ nomatch ->
+ %% This is not a defeat. The code will eventually
+ %% be optimized to erlang:error(...) by the other
+ %% optimizations done in this module.
+ impossible
+ end;
{_,_,_} -> impossible
end;
move_let_into_expr(#c_let{vars=Lvs0,body=Lbody0}=Let,
@@ -2638,14 +2759,18 @@ move_case_into_arg(#c_case{arg=#c_case{arg=OuterArg,
%% end
%%
ScopeSub0 = sub_subst_scope(Sub#sub{t=#{}}),
- {OuterPats,ScopeSub} = pattern_list(OuterPats0, ScopeSub0),
- OuterGuard = guard(OuterGuard0, ScopeSub),
- InnerArg = body(InnerArg0, ScopeSub),
- Inner = Inner0#c_case{arg=InnerArg,clauses=InnerClauses},
- OuterCa = OuterCa0#c_clause{pats=OuterPats,guard=OuterGuard,
- body=Inner},
- Outer#c_case{arg=OuterArg,
- clauses=[OuterCa,OuterCb]};
+
+ %% We KNOW that pattern_list/2 has already been called for OuterPats0;
+ %% therefore, it cannot throw an exception.
+ {OuterPats,ScopeSub} = pattern_list(OuterPats0, ScopeSub0),
+ OuterGuard = guard(OuterGuard0, ScopeSub),
+ InnerArg = body(InnerArg0, ScopeSub),
+ Inner = Inner0#c_case{arg=InnerArg,clauses=InnerClauses},
+ OuterCa = OuterCa0#c_clause{pats=OuterPats,
+ guard=OuterGuard,
+ body=Inner},
+ Outer#c_case{arg=OuterArg,
+ clauses=[OuterCa,OuterCb]};
false ->
impossible
end;
@@ -3227,6 +3352,29 @@ format_error(nomatch_shadow) ->
"this clause cannot match because a previous clause always matches";
format_error(nomatch_guard) ->
"the guard for this clause evaluates to 'false'";
+format_error({nomatch_bit_syntax_truncated,Signess,Val,Sz}) ->
+ S = case Signess of
+ signed -> "a 'signed'";
+ unsigned -> "an 'unsigned'"
+ end,
+ F = "this clause cannot match because the value ~P"
+ " will not fit in ~s binary segment of size ~p",
+ flatten(io_lib:format(F, [Val,10,S,Sz]));
+format_error({nomatch_bit_syntax_unsigned,Val}) ->
+ F = "this clause cannot match because the negative value ~P"
+ " will never match the value of an 'unsigned' binary segment",
+ flatten(io_lib:format(F, [Val,10]));
+format_error({nomatch_bit_syntax_size,Sz}) ->
+ F = "this clause cannot match because '~P' is not a valid size for a binary segment",
+ flatten(io_lib:format(F, [Sz,10]));
+format_error({nomatch_bit_syntax_type,Val,Type}) ->
+ F = "this clause cannot match because '~P' is not of the"
+ " expected type '~p'",
+ flatten(io_lib:format(F, [Val,10,Type]));
+format_error({nomatch_bit_syntax_unicode,Val}) ->
+ F = "this clause cannot match because the value ~p"
+ " is not a valid Unicode code point",
+ flatten(io_lib:format(F, [Val]));
format_error(no_clause_match) ->
"no clause will ever match";
format_error(nomatch_clause_type) ->
diff --git a/lib/compiler/test/bs_match_SUITE.erl b/lib/compiler/test/bs_match_SUITE.erl
index 99539f3779..b79feb1393 100644
--- a/lib/compiler/test/bs_match_SUITE.erl
+++ b/lib/compiler/test/bs_match_SUITE.erl
@@ -37,11 +37,13 @@
cover_beam_bool/1,matched_out_size/1,follow_fail_branch/1,
no_partition/1,calling_a_binary/1,binary_in_map/1,
match_string_opt/1,select_on_integer/1,
- map_and_binary/1,unsafe_branch_caching/1]).
+ map_and_binary/1,unsafe_branch_caching/1,
+ bad_literals/1,good_literals/1]).
-export([coverage_id/1,coverage_external_ignore/2]).
-include_lib("common_test/include/ct.hrl").
+-include_lib("syntax_tools/include/merl.hrl").
suite() -> [{ct_hooks,[ts_install_cth]}].
@@ -64,7 +66,8 @@ groups() ->
cover_beam_bool,matched_out_size,follow_fail_branch,
no_partition,calling_a_binary,binary_in_map,
match_string_opt,select_on_integer,
- map_and_binary,unsafe_branch_caching]}].
+ map_and_binary,unsafe_branch_caching,
+ bad_literals,good_literals]}].
init_per_suite(Config) ->
@@ -1293,6 +1296,96 @@ do_unsafe_branch_caching(<<Code/integer, Bin/binary>>) ->
_ -> Bin2
end.
+bad_literals(_Config) ->
+ Mod = list_to_atom(?MODULE_STRING ++ "_" ++
+ atom_to_list(?FUNCTION_NAME)),
+ S = [signed_lit_match(V, Sz) || V <- lists:seq(-8, 8),
+ Sz <- [0,1,2,3]] ++
+ [unsigned_lit_match(V, Sz) || V <- lists:seq(-2, 8),
+ Sz <- [0,1,2]] ++
+ [unicode_match(V) ||
+ V <- [-100,-1,0,1,2|lists:seq(16#10FFFC, 16#110004)]],
+ Code = ?Q(["-module('@Mod@').\n"
+ "-export([f/0]).\n"
+ "f() ->\n"
+ "_@S,\n"
+ "ok.\n"]),
+ merl:print(Code),
+ Opts = test_lib:opt_opts(?MODULE),
+ {ok,_} = merl:compile_and_load(Code, Opts),
+ Mod:f(),
+
+ {'EXIT',<<42>>} = (catch bad_literals_1()),
+
+ Sz = id(8),
+ {'EXIT',{{badmatch,_},_}} = (catch <<-1:Sz>> = <<-1>>),
+ ok.
+
+bad_literals_1() ->
+ BadSz = bad,
+ case case <<42>> of
+ <<42:BadSz>> -> ok;
+ Val -> exit(Val)
+ end of
+ ok -> ok;
+ error -> error
+ end.
+
+signed_lit_match(V, Sz) ->
+ case <<V:Sz>> of
+ <<V:Sz/signed>> ->
+ ?Q("<<_@V@:_@Sz@/signed>> = <<_@V@:_@Sz@>>");
+ _ ->
+ ?Q(["case <<_@V@:_@Sz@>> of\n",
+ " <<_@V@:_@Sz@/signed>> ->\n",
+ " ct:fail(should_not_match);\n",
+ " _ ->\n",
+ " ok\n",
+ "end\n"])
+ end.
+
+unsigned_lit_match(V, Sz) ->
+ case <<V:Sz>> of
+ <<V:Sz/unsigned>> ->
+ ?Q("<<_@V@:_@Sz@>> = <<_@V@:_@Sz@>>");
+ _ ->
+ ?Q(["case <<_@V@:_@Sz@>> of\n",
+ " <<_@V@:_@Sz@/unsigned>> ->\n",
+ " ct:fail(should_not_match);\n",
+ " _ ->\n",
+ " ok\n",
+ "end\n"])
+ end.
+
+unicode_match(V) ->
+ try <<V/utf8>> of
+ <<V/utf8>> ->
+ ?Q(["<<_@V@/utf8>> = <<_@V@/utf8>>,\n",
+ "<<_@V@/utf16>> = <<_@V@/utf16>>,\n",
+ "<<_@V@/utf32>> = <<_@V@/utf32>>\n"])
+ catch
+ error:badarg ->
+ ?Q(["case <<_@V@:32>> of\n",
+ " <<_@V@/utf32>> ->\n",
+ " ct:fail(should_not_match);\n",
+ " _ ->\n",
+ " ok\n",
+ "end\n"])
+ end.
+
+%% Test a few legal but rare cases.
+
+good_literals(_Config) ->
+ Sz = id(64),
+
+ %% Variable size.
+ <<42:Sz>> = id(<<42:Sz>>),
+ <<42.0:Sz/float>> = id(<<42:Sz/float>>),
+
+ %% unit > 1
+ <<16#cafebeef:4/unit:8>> = id(<<16#cafebeef:32>>),
+ ok.
+
check(F, R) ->
R = F().
diff --git a/lib/compiler/test/warnings_SUITE.erl b/lib/compiler/test/warnings_SUITE.erl
index 2a425c2ae5..df5f6760e4 100644
--- a/lib/compiler/test/warnings_SUITE.erl
+++ b/lib/compiler/test/warnings_SUITE.erl
@@ -41,7 +41,8 @@
files/1,effect/1,bin_opt_info/1,bin_construction/1,
comprehensions/1,maps/1,maps_bin_opt_info/1,
redundant_boolean_clauses/1,
- latin1_fallback/1,underscore/1,no_warnings/1]).
+ latin1_fallback/1,underscore/1,no_warnings/1,
+ bit_syntax/1]).
% Default timetrap timeout (set in init_per_testcase).
-define(default_timeout, ?t:minutes(2)).
@@ -68,7 +69,7 @@ groups() ->
bin_opt_info,bin_construction,comprehensions,maps,
maps_bin_opt_info,
redundant_boolean_clauses,latin1_fallback,
- underscore,no_warnings]}].
+ underscore,no_warnings,bit_syntax]}].
init_per_suite(Config) ->
Config.
@@ -783,6 +784,50 @@ no_warnings(Config) when is_list(Config) ->
run(Config, Ts),
ok.
+bit_syntax(Config) ->
+ Ts = [{?FUNCTION_NAME,
+ <<"a(<<-1>>) -> ok;
+ a(<<1023>>) -> ok;
+ a(<<777/signed>>) -> ok;
+ a(<<a/binary>>) -> ok;
+ a(<<a/integer>>) -> ok;
+ a(<<a/float>>) -> ok;
+ a(<<a/utf8>>) -> ok;
+ a(<<a/utf16>>) -> ok;
+ a(<<a/utf32>>) -> ok;
+ a(<<a/utf32>>) -> ok.
+ b(Bin) -> Sz = bad, <<42:Sz>> = Bin.
+ c(Sz, Bin) ->
+ case Bin of
+ <<-42:Sz/unsigned>> -> ok;
+ <<42:Sz/float>> -> ok;
+ <<42:Sz/binary>> -> ok
+ end.
+ ">>,
+ [],
+ {warnings,[{1,sys_core_fold,no_clause_match},
+ {1,sys_core_fold,{nomatch_bit_syntax_unsigned,-1}},
+ {2,sys_core_fold,{nomatch_bit_syntax_truncated,
+ unsigned,1023,8}},
+ {3,sys_core_fold,{nomatch_bit_syntax_truncated,
+ signed,777,8}},
+ {4,sys_core_fold,{nomatch_bit_syntax_type,a,binary}},
+ {5,sys_core_fold,{nomatch_bit_syntax_type,a,integer}},
+ {6,sys_core_fold,{nomatch_bit_syntax_type,a,float}},
+ {7,sys_core_fold,{nomatch_bit_syntax_type,a,utf8}},
+ {8,sys_core_fold,{nomatch_bit_syntax_type,a,utf16}},
+ {9,sys_core_fold,{nomatch_bit_syntax_type,a,utf32}},
+ {10,sys_core_fold,{nomatch_bit_syntax_type,a,utf32}},
+ {11,sys_core_fold,no_clause_match},
+ {11,sys_core_fold,{nomatch_bit_syntax_size,bad}},
+ {14,sys_core_fold,{nomatch_bit_syntax_unsigned,-42}},
+ {16,sys_core_fold,{nomatch_bit_syntax_type,42,binary}}
+ ]}
+ }],
+ run(Config, Ts),
+ ok.
+
+
%%%
%%% End of test cases.
%%%