From 1cac7619bf9892f571f855a424b5542c3c1aaf81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?John=20H=C3=B6gberg?= <john@erlang.org>
Date: Tue, 5 Mar 2019 15:14:01 +0100
Subject: beam_validator: Refactor type conflict resolution

The current type conflict resolution works well for the example
case in the comment, but doesn't handle branched code properly,
consider the following:

  {label,2}.
    {test,is_tagged_tuple,{f,ignored},[{x,0},3,{atom,r}]}.
    {allocate_zero,2,1}.
    {move,{x,0},{y,0}}.
    %% {y,0} is known to be {r, _, _} now.
    {get_tuple_element,{x,0},2,{x,0}}.
    {'try',{y,1},{f,3}}.
    %% ... snip ...
    {jump,{f,5}}.
  {label,3}.
    {try_case,{y,1}}.
    %% {x,0} is the error class (an atom), {x,1} is the error term.
    {test,is_eq_exact,{f,ignored},[{x,0},{y,0}]}.
    %% ... since tuples and atoms can't meet, the type of {y,0} is
    %% now {atom,[]} because the current code assumes the type
    %% we're updating with.
    {move,{x,1},{x,0}}.
    {jump,{f,5}}.
  {label,5}.
    %% ... joining tuple (block 2) and atom (block 3) means 'term',
    %% so the get_tuple_element instruction fails to validate
    %% despite this being unrechable from block 3.
    {test_heap,3,1}.
    {get_tuple_element,{y,0},1,{x,1}}.
    {put_tuple2,{x,0},{list,[{x,1},{x,0}]}}.
    {deallocate,2}.
    return.

This commit kills the state on type conflicts, making unreachable
instructions truly unreachable.
---
 lib/compiler/src/beam_validator.erl                | 63 ++++++++++++++++------
 lib/compiler/test/beam_validator_SUITE.erl         | 29 ++++++++--
 .../beam_validator_SUITE_data/merge_undefined.S    |  4 +-
 3 files changed, 75 insertions(+), 21 deletions(-)

(limited to 'lib/compiler')

diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl
index 953425fde9..139eee95ba 100644
--- a/lib/compiler/src/beam_validator.erl
+++ b/lib/compiler/src/beam_validator.erl
@@ -1710,7 +1710,7 @@ override_type(Type, Reg, Vst) ->
 
 %% This is used when linear code finds out more and more information about a
 %% type, so that the type gets more specialized.
-update_type(Merge, Type0, #value_ref{}=Ref, Vst) ->
+update_type(Merge, With, #value_ref{}=Ref, Vst) ->
     %% If the old type can't be merged with the new one, the type information
     %% is inconsistent and we know that some instructions will never be
     %% executed at run-time. For example:
@@ -1719,18 +1719,31 @@ update_type(Merge, Type0, #value_ref{}=Ref, Vst) ->
     %%   {test,is_tuple,Fail,[Reg]}.
     %%   {test,test_arity,Fail,[Reg,5]}.
     %%
-    %% Note that the test_arity instruction can never be reached, so we use the
-    %% new type instead of 'none'.
-    Type = case Merge(get_raw_type(Ref, Vst), Type0) of
-               none -> Type0;
-               T -> T
-           end,
-    set_type(Type, Ref, Vst);
-update_type(Merge, Type, {Kind,_}=Reg, Vst) when Kind =:= x; Kind =:= y ->
-    update_type(Merge, Type, get_reg_vref(Reg, Vst), Vst);
-update_type(_Merge, _Type, Literal, Vst) ->
+    %% Note that the test_arity instruction can never be reached, so we need to
+    %% kill the state to avoid raising an error when we encounter it.
+    %%
+    %% Simply returning `kill_state(Vst)` is unsafe however as we might be in
+    %% the middle of an instruction, and altering the rest of the validator
+    %% (eg. prune_x_regs/2) to no-op on dead states is prone to error.
+    %%
+    %% We therefore throw a 'type_conflict' error instead, which causes
+    %% validation to fail unless we're in a context where such errors can be
+    %% handled, such as in a branch handler.
+    Current = get_raw_type(Ref, Vst),
+    case Merge(Current, With) of
+        none -> throw({type_conflict, Current, With});
+        Type -> set_type(Type, Ref, Vst)
+    end;
+update_type(Merge, With, {Kind,_}=Reg, Vst) when Kind =:= x; Kind =:= y ->
+    update_type(Merge, With, get_reg_vref(Reg, Vst), Vst);
+update_type(Merge, With, Literal, Vst) ->
     assert_literal(Literal),
-    Vst.
+    %% Literals always retain their type, but we still need to bail on type
+    %% conflicts.
+    case Merge(Literal, With) of
+        none -> throw({type_conflict, Literal, With});
+        _Type -> Vst
+    end.
 
 update_ne_types(LHS, RHS, Vst) ->
     update_type(fun subtract/2, get_term_type(RHS, Vst), LHS, Vst).
@@ -2255,6 +2268,9 @@ glt_1(L) ->
 %% Forks the execution flow, with the provided funs returning the new state of
 %% their respective branch; the "fail" fun returns the state where the branch
 %% is taken, and the "success" fun returns the state where it's not.
+%%
+%% If either path is known not to be taken at runtime (eg. due to a type
+%% conflict), it will simply be discarded.
 -spec branch(Lbl :: label(),
              Original :: #vst{},
              FailFun :: BranchFun,
@@ -2262,10 +2278,25 @@ glt_1(L) ->
      BranchFun :: fun((#vst{}) -> #vst{}).
 branch(Lbl, Vst0, FailFun, SuccFun) ->
     #vst{current=St0} = Vst0,
-    Vst1 = FailFun(Vst0),
-    Vst2 = branch_state(Lbl, Vst1),
-    Vst = Vst2#vst{current=St0},
-    SuccFun(Vst).
+    try FailFun(Vst0) of
+        Vst1 ->
+            Vst2 = branch_state(Lbl, Vst1),
+            Vst = Vst2#vst{current=St0},
+            try SuccFun(Vst) of
+                V -> V
+            catch
+                {type_conflict, _, _} ->
+                    %% The instruction is guaranteed to fail; kill the state.
+                    kill_state(Vst)
+            end
+    catch
+        {type_conflict, _, _} ->
+            %% This instruction is guaranteed not to fail, so we run the
+            %% success branch *without* catching type conflicts to avoid hiding
+            %% errors in the validator itself; one of the branches must
+            %% succeed.
+            SuccFun(Vst0)
+    end.
 
 %% A shorthand version of branch/4 for when the state is only altered on
 %% success.
diff --git a/lib/compiler/test/beam_validator_SUITE.erl b/lib/compiler/test/beam_validator_SUITE.erl
index 265da43f9d..8b39fce479 100644
--- a/lib/compiler/test/beam_validator_SUITE.erl
+++ b/lib/compiler/test/beam_validator_SUITE.erl
@@ -34,7 +34,7 @@
 	 undef_label/1,illegal_instruction/1,failing_gc_guard_bif/1,
 	 map_field_lists/1,cover_bin_opt/1,
 	 val_dsetel/1,bad_tuples/1,bad_try_catch_nesting/1,
-         receive_stacked/1,aliased_types/1]).
+         receive_stacked/1,aliased_types/1,type_conflict/1]).
 
 -include_lib("common_test/include/ct.hrl").
 
@@ -63,7 +63,7 @@ groups() ->
        undef_label,illegal_instruction,failing_gc_guard_bif,
        map_field_lists,cover_bin_opt,val_dsetel,
        bad_tuples,bad_try_catch_nesting,
-       receive_stacked,aliased_types]}].
+       receive_stacked,aliased_types,type_conflict]}].
 
 init_per_suite(Config) ->
     test_lib:recompile(?MODULE),
@@ -156,8 +156,8 @@ call_last(Config) when is_list(Config) ->
 merge_undefined(Config) when is_list(Config) ->
     Errors = do_val(merge_undefined, Config),
     [{{t,handle_call,2},
-      {{call_ext,1,{extfunc,erlang,exit,1}},
-       10,
+      {{call_ext,2,{extfunc,debug,filter,2}},
+       22,
        {uninitialized_reg,{y,_}}}}] = Errors,
     ok.
 
@@ -630,6 +630,27 @@ aliased_types_3(Bug) ->
             hd(List)
     end.
 
+
+%% ERL-867; validation proceeded after a type conflict, causing incorrect types
+%% to be joined.
+
+-record(r, { e1 = e1, e2 = e2 }).
+
+type_conflict(Config) when is_list(Config) ->
+    {e1, e2} = type_conflict_1(#r{}),
+    ok.
+
+type_conflict_1(C) ->
+    Src = id(C#r.e2),
+    TRes = try id(Src) of
+               R -> R
+           catch
+               %% C:R can never match, yet it assumed that the type of 'C' was
+               %% an atom from here on.
+               C:R -> R
+           end,
+    {C#r.e1, TRes}.
+
 %%%-------------------------------------------------------------------------
 
 transform_remove(Remove, Module) ->
diff --git a/lib/compiler/test/beam_validator_SUITE_data/merge_undefined.S b/lib/compiler/test/beam_validator_SUITE_data/merge_undefined.S
index 481d55045d..aa344807e4 100644
--- a/lib/compiler/test/beam_validator_SUITE_data/merge_undefined.S
+++ b/lib/compiler/test/beam_validator_SUITE_data/merge_undefined.S
@@ -15,8 +15,9 @@
     {select_val,{x,0},{f,1},{list,[{atom,gurka},{f,3},{atom,delete},{f,4}]}}.
   {label,3}.
     {allocate_heap,2,6,2}.
-    %% The Y registers are not initialized here.
     {test,is_eq_exact,{f,5},[{x,0},{atom,ok}]}.
+    %% This is unreachable since {x,0} is known not to be 'ok'. We should not
+    %% fail with "uninitialized y registers" on erlang:exit/1
     {move,{atom,nisse},{x,0}}.
     {call_ext,1,{extfunc,erlang,exit,1}}.
   {label,4}.
@@ -29,6 +30,7 @@
     {call_ext,2,{extfunc,io,format,2}}.
     {test,is_ne_exact,{f,6},[{x,0},{atom,ok}]}.
   {label,5}.
+    %% The Y registers are not initialized here.
     {move,{atom,logReader},{x,1}}.
     {move,{atom,console},{x,0}}.
     {call_ext,2,{extfunc,debug,filter,2}}.
-- 
cgit v1.2.3