15 files changed, 618 insertions, 358 deletions
diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile
index 961dacc6c9..074d9b881b 100644
--- a/lib/compiler/src/Makefile
+++ b/lib/compiler/src/Makefile
@@ -49,7 +49,6 @@ MODULES =  \
 	beam_a \
 	beam_asm \
 	beam_block \
-	beam_bs \
 	beam_clean \
 	beam_dict \
 	beam_disasm \
diff --git a/lib/compiler/src/beam_a.erl b/lib/compiler/src/beam_a.erl
index dd2537a699..1ac892a8f1 100644
--- a/lib/compiler/src/beam_a.erl
+++ b/lib/compiler/src/beam_a.erl
@@ -100,8 +100,12 @@ rename_instr({bs_put_utf16=I,F,Fl,Src}) ->
     {bs_put,F,{I,Fl},[Src]};
 rename_instr({bs_put_utf32=I,F,Fl,Src}) ->
     {bs_put,F,{I,Fl},[Src]};
-rename_instr({bs_put_string,_,_}=I) ->
-    {bs_put,{f,0},I,[]};
+rename_instr({bs_put_string,_,{string,String}}) ->
+    %% Only happens when compiling from .S files. In old
+    %% .S files, String is a list. In .S in OTP 22 and later,
+    %% String is a binary.
+    {bs_put,{f,0},{bs_put_binary,8,{field_flags,[unsigned,big]}},
+     [{atom,all},{literal,iolist_to_binary([String])}]};
 rename_instr({bs_add=I,F,[Src1,Src2,U],Dst}) when is_integer(U) ->
     {bif,I,F,[Src1,Src2,{integer,U}],Dst};
 rename_instr({bs_utf8_size=I,F,Src,Dst}) ->
@@ -118,8 +122,8 @@ rename_instr({bs_private_append=I,F,Sz,U,Src,Flags,Dst}) ->
     {bs_init,F,{I,U,Flags},none,[Sz,Src],Dst};
 rename_instr(bs_init_writable=I) ->
     {bs_init,{f,0},I,1,[{x,0}],{x,0}};
-rename_instr({test,Op,F,[Ctx,Bits,{string,Str}]}) ->
-    %% When compiling from a .S file.
+rename_instr({test,bs_match_string=Op,F,[Ctx,Bits,{string,Str}]}) when is_list(Str) ->
+    %% When compiling from an old .S file. Starting from OTP 22, Str is a binary.
     <<Bs:Bits/bits,_/bits>> = list_to_binary(Str),
     {test,Op,F,[Ctx,Bs]};
 rename_instr({put_map_assoc,Fail,S,D,R,L}) ->
diff --git a/lib/compiler/src/beam_asm.erl b/lib/compiler/src/beam_asm.erl
index df0321e85a..bc1290f6fd 100644
--- a/lib/compiler/src/beam_asm.erl
+++ b/lib/compiler/src/beam_asm.erl
@@ -424,8 +424,8 @@ encode_arg({f, W}, Dict) ->
     {encode(?tag_f, W), Dict};
 %% encode_arg({'char', C}, Dict) ->
 %%     {encode(?tag_h, C), Dict};
-encode_arg({string, String}, Dict0) ->
-    {Offset, Dict} = beam_dict:string(String, Dict0),
+encode_arg({string, BinString}, Dict0) when is_binary(BinString) ->
+    {Offset, Dict} = beam_dict:string(BinString, Dict0),
     {encode(?tag_u, Offset), Dict};
 encode_arg({extfunc, M, F, A}, Dict0) ->
     {Index, Dict} = beam_dict:import(M, F, A, Dict0),
diff --git a/lib/compiler/src/beam_bs.erl b/lib/compiler/src/beam_bs.erl
deleted file mode 100644
index 15d8d687fc..0000000000
--- a/lib/compiler/src/beam_bs.erl
+++ /dev/null
@@ -1,183 +0,0 @@
-%%
-%% %CopyrightBegin%
-%%
-%% Copyright Ericsson AB 1999-2018. All Rights Reserved.
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
-%%
-%% %CopyrightEnd%
-%%
-%% Purpose: Peephole optimization of binary syntax instructions.
-
--module(beam_bs).
-
--export([module/2]).
--import(lists, [reverse/1]).
-
--spec module(beam_utils:module_code(), [compile:option()]) ->
-                    {'ok',beam_utils:module_code()}.
-
-module({Mod,Exp,Attr,Fs0,Lc}, _Opt) ->
-    Fs = [function(F) || F <- Fs0],
-    {ok,{Mod,Exp,Attr,Fs,Lc}}.
-
-function({function,Name,Arity,CLabel,Is0}) ->
-    try
-	Is = bs_opt(Is0),
-	{function,Name,Arity,CLabel,Is}
-    catch
-        Class:Error:Stack ->
-	    io:fwrite("Function: ~w/~w\n", [Name,Arity]),
-	    erlang:raise(Class, Error, Stack)
-    end.
-
-%%%
-%%% Evaluate construction of constant bit fields.
-%%% Combine bs_skip_bits2 and bs_test_tail2 instructions.
-%%%
-
-bs_opt([{bs_put,_,_,_}=I|Is0]) ->
-    {BsPuts0,Is} = collect_bs_puts(Is0, [I]),
-    BsPuts = opt_bs_puts(BsPuts0),
-    BsPuts ++ bs_opt(Is);
-bs_opt([{test,bs_skip_bits2,F,[Ctx,{integer,I},Unit,_Flags]},
-            {test,bs_test_tail2,F,[Ctx,Bits]}|Is]) ->
-    [{test,bs_test_tail2,F,[Ctx,Bits+I*Unit]}|bs_opt(Is)];
-bs_opt([{test,bs_skip_bits2,F,[Ctx,{integer,I1},Unit1,Flags]},
-            {test,bs_skip_bits2,F,[Ctx,{integer,I2},Unit2,_]}|Is]) ->
-    I = {test,bs_skip_bits2,F,
-         [Ctx,{integer,I1*Unit1+I2*Unit2},1,Flags]},
-    bs_opt([I|Is]);
-bs_opt([I|Is]) ->
-    [I|bs_opt(Is)];
-bs_opt([]) -> [].
-
-collect_bs_puts([{bs_put,_,_,_}=I|Is], Acc) ->
-    collect_bs_puts(Is, [I|Acc]);
-collect_bs_puts([_|_]=Is, Acc) ->
-    {reverse(Acc),Is}.
-
-opt_bs_puts(Is) ->
-    opt_bs_1(Is, []).
-
-opt_bs_1([{bs_put,Fail,
-	   {bs_put_float,1,Flags0},[{integer,Sz},Src]}=I0|Is], Acc) ->
-    try eval_put_float(Src, Sz, Flags0) of
-	<<Int:Sz>> ->
-	    Flags = force_big(Flags0),
-	    I = {bs_put,Fail,{bs_put_integer,1,Flags},
-		 [{integer,Sz},{integer,Int}]},
-	    opt_bs_1([I|Is], Acc)
-    catch
-	error:_ ->
-	    opt_bs_1(Is, [I0|Acc])
-    end;
-opt_bs_1([{bs_put,_,{bs_put_integer,1,_},[{integer,8},{integer,_}]}|_]=IsAll,
-	 Acc0) ->
-    {Is,Acc} = bs_collect_string(IsAll, Acc0),
-    opt_bs_1(Is, Acc);
-opt_bs_1([{bs_put,Fail,{bs_put_integer,1,F},[{integer,Sz},{integer,N}]}=I|Is0],
-	 Acc) when Sz > 8 ->
-    case field_endian(F) of
-	big ->
-	    %% We can do this optimization for any field size without
-	    %% risk for code explosion.
-	    case bs_split_int(N, Sz, Fail, Is0) of
-		no_split -> opt_bs_1(Is0, [I|Acc]);
-		Is -> opt_bs_1(Is, Acc)
-	    end;
-	little when Sz < 128 ->
-	    %% We only try to optimize relatively small fields, to
-	    %% avoid an explosion in code size.
-	    <<Int:Sz>> = <<N:Sz/little>>,
-	    Flags = force_big(F),
-	    Is = [{bs_put,Fail,{bs_put_integer,1,Flags},
-		   [{integer,Sz},{integer,Int}]}|Is0],
-	    opt_bs_1(Is, Acc);
-	_ ->			      %native or too wide little field
-	    opt_bs_1(Is0, [I|Acc])
-    end;
-opt_bs_1([{bs_put,Fail,{Op,U,F},[{integer,Sz},Src]}|Is], Acc) when U > 1 ->
-    opt_bs_1([{bs_put,Fail,{Op,1,F},[{integer,U*Sz},Src]}|Is], Acc);
-opt_bs_1([I|Is], Acc) ->
-    opt_bs_1(Is, [I|Acc]);
-opt_bs_1([], Acc) -> reverse(Acc).
-
-eval_put_float(Src, Sz, Flags) when Sz =< 256 ->
-    %%Only evaluate if Sz is reasonable.
-    Val = value(Src),
-    case field_endian(Flags) of
-	little -> <<Val:Sz/little-float-unit:1>>;
-	big -> <<Val:Sz/big-float-unit:1>>
-        %% native intentionally not handled here - we can't optimize
-        %% it.
-    end.
-
-value({integer,I}) -> I;
-value({float,F}) -> F.
-
-bs_collect_string(Is, [{bs_put,_,{bs_put_string,Len,{string,Str}},[]}|Acc]) ->
-    bs_coll_str_1(Is, Len, reverse(Str), Acc);
-bs_collect_string(Is, Acc) ->
-    bs_coll_str_1(Is, 0, [], Acc).
-
-bs_coll_str_1([{bs_put,_,{bs_put_integer,U,_},[{integer,Sz},{integer,V}]}|Is],
-	      Len, StrAcc, IsAcc) when U*Sz =:= 8 ->
-    Byte = V band 16#FF,
-    bs_coll_str_1(Is, Len+1, [Byte|StrAcc], IsAcc);
-bs_coll_str_1(Is, Len, StrAcc, IsAcc) ->
-    {Is,[{bs_put,{f,0},{bs_put_string,Len,{string,reverse(StrAcc)}},[]}|IsAcc]}.
-
-field_endian({field_flags,F}) -> field_endian_1(F).
-
-field_endian_1([big=E|_]) -> E;
-field_endian_1([little=E|_]) -> E;
-field_endian_1([native=E|_]) -> E;
-field_endian_1([_|Fs]) -> field_endian_1(Fs).
-
-force_big({field_flags,F}) ->
-    {field_flags,force_big_1(F)}.
-
-force_big_1([big|_]=Fs) -> Fs;
-force_big_1([little|Fs]) -> [big|Fs];
-force_big_1([F|Fs]) -> [F|force_big_1(Fs)].
-
-bs_split_int(0, Sz, _, _) when Sz > 64 ->
-    %% We don't want to split in this case because the
-    %% string will consist of only zeroes.
-    no_split;
-bs_split_int(-1, Sz, _, _) when Sz > 64 ->
-    %% We don't want to split in this case because the
-    %% string will consist of only 255 bytes.
-    no_split;
-bs_split_int(N, Sz, Fail, Acc) ->
-    FirstByteSz = case Sz rem 8 of
-		      0 -> 8;
-		      Rem -> Rem
-		  end,
-    bs_split_int_1(N, FirstByteSz, Sz, Fail, Acc).
-
-bs_split_int_1(-1, _, Sz, Fail, Acc) when Sz > 64 ->
-    I = {bs_put,Fail,{bs_put_integer,1,{field_flags,[big]}},
-	 [{integer,Sz},{integer,-1}]},
-    [I|Acc];
-bs_split_int_1(0, _, Sz, Fail, Acc) when Sz > 64 ->
-    I = {bs_put,Fail,{bs_put_integer,1,{field_flags,[big]}},
-	 [{integer,Sz},{integer,0}]},
-    [I|Acc];
-bs_split_int_1(N, ByteSz, Sz, Fail, Acc) when Sz > 0 ->
-    Mask = (1 bsl ByteSz) - 1,
-    I = {bs_put,Fail,{bs_put_integer,1,{field_flags,[big]}},
-	 [{integer,ByteSz},{integer,N band Mask}]},
-    bs_split_int_1(N bsr ByteSz, 8, Sz-ByteSz, Fail, [I|Acc]);
-bs_split_int_1(_, _, _, _, Acc) -> Acc.
diff --git a/lib/compiler/src/beam_dict.erl b/lib/compiler/src/beam_dict.erl
index 990e86062a..b2056332e6 100644
--- a/lib/compiler/src/beam_dict.erl
+++ b/lib/compiler/src/beam_dict.erl
@@ -126,18 +126,17 @@ import(Mod0, Name0, Arity, #asm{imports=Imp0,next_import=NextIndex}=D0)
 	    {NextIndex,D2#asm{imports=Imp,next_import=NextIndex+1}}
     end.
 
-%% Returns the index for a string in the string table (adding the string to the
-%% table if necessary).
+%% Returns the index for a binary string in the string table (adding
+%% the string to the table if necessary).
 %%    string(String, Dict) -> {Offset, Dict'}
--spec string(string(), bdict()) -> {non_neg_integer(), bdict()}.
+-spec string(binary(), bdict()) -> {non_neg_integer(), bdict()}.
 
-string(Str, Dict) when is_list(Str) ->
+string(BinString, Dict) when is_binary(BinString) ->
     #asm{strings=Strings,string_offset=NextOffset} = Dict,
-    StrBin = list_to_binary(Str),
-    case old_string(StrBin, Strings) of
+    case old_string(BinString, Strings) of
 	none ->
-	    NewDict = Dict#asm{strings = <<Strings/binary,StrBin/binary>>,
-			       string_offset=NextOffset+byte_size(StrBin)},
+	    NewDict = Dict#asm{strings = <<Strings/binary,BinString/binary>>,
+			       string_offset=NextOffset+byte_size(BinString)},
 	    {NextOffset,NewDict};
 	Offset when is_integer(Offset) ->
 	    {NextOffset-Offset,Dict}
diff --git a/lib/compiler/src/beam_ssa_codegen.erl b/lib/compiler/src/beam_ssa_codegen.erl
index d3facc5911..fe1a0c8480 100644
--- a/lib/compiler/src/beam_ssa_codegen.erl
+++ b/lib/compiler/src/beam_ssa_codegen.erl
@@ -1071,8 +1071,8 @@ cg_block([#cg_set{op={bif,Name},dst=Dst0,args=Args0}]=Is0, {Dst0,Fail}, St0) ->
         {z,_} ->
             %% The result of the BIF call will only be used once. Convert to
             %% a test instruction.
-            Test = bif_to_test(Name, Args, ensure_label(Fail, St0)),
-            {Test,St0};
+            {Test,St1} = bif_to_test(Name, Args, ensure_label(Fail, St0), St0),
+            {Test,St1};
         _ ->
             %% Must explicitly call the BIF since the result will be used
             %% more than once.
@@ -1269,16 +1269,17 @@ cg_copy_1([], _St) -> [].
                           element(1, Val) =:= atom orelse
                           element(1, Val) =:= literal)).
 
+bif_to_test('or', [V1,V2], {f,Lbl}=Fail, St0) when Lbl =/= 0 ->
+    {SuccLabel,St} = new_label(St0),
+    {[{test,is_eq_exact,{f,SuccLabel},[V1,{atom,false}]},
+      {test,is_eq_exact,Fail,[V2,{atom,true}]},
+      {label,SuccLabel}],St};
+bif_to_test(Op, Args, Fail, St) ->
+    {bif_to_test(Op, Args, Fail),St}.
+
 bif_to_test('and', [V1,V2], Fail) ->
     [{test,is_eq_exact,Fail,[V1,{atom,true}]},
      {test,is_eq_exact,Fail,[V2,{atom,true}]}];
-bif_to_test('or', [V1,V2], {f,Lbl}=Fail) when Lbl =/= 0 ->
-    %% Labels are spaced 2 apart. We can create a new
-    %% label by incrementing the Fail label.
-    SuccLabel = Lbl + 1,
-    [{test,is_eq_exact,{f,SuccLabel},[V1,{atom,false}]},
-     {test,is_eq_exact,Fail,[V2,{atom,true}]},
-     {label,SuccLabel}];
 bif_to_test('not', [Var], Fail) ->
     [{test,is_eq_exact,Fail,[Var,{atom,false}]}];
 bif_to_test(Name, Args, Fail) ->
@@ -2017,9 +2018,7 @@ is_gc_bif(Bif, Args) ->
 %% new_label(St) -> {L,St}.
 
 new_label(#cg{lcount=Next}=St) ->
-    %% Advance the label counter by 2 to allow us to create
-    %% a label for 'or' by incrementing an existing label.
-    {Next,St#cg{lcount=Next+2}}.
+    {Next,St#cg{lcount=Next+1}}.
 
 %% call_line(tail|body, Func, Anno) -> [] | [{line,...}].
 %%  Produce a line instruction if it will be needed by the
diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl
index 2dda67eac6..6f7044f006 100644
--- a/lib/compiler/src/beam_ssa_opt.erl
+++ b/lib/compiler/src/beam_ssa_opt.erl
@@ -22,7 +22,7 @@
 -export([module/2]).
 
 -include("beam_ssa.hrl").
--import(lists, [all/2,append/1,foldl/3,keyfind/3,member/2,
+-import(lists, [append/1,foldl/3,keyfind/3,member/2,
                 reverse/1,reverse/2,
                 splitwith/2,takewhile/2,unzip/1]).
 
@@ -52,25 +52,30 @@ passes(Opts0) ->
           ?PASS(ssa_opt_coalesce_phis),
           ?PASS(ssa_opt_element),
           ?PASS(ssa_opt_linearize),
+          ?PASS(ssa_opt_tuple_size),
           ?PASS(ssa_opt_record),
 
           %% Run ssa_opt_cse twice, because it will help ssa_opt_dead,
-          %% and ssa_opt_dead will help ssa_opt_cse. Run ssa_opt_live
-          %% twice, because it will help ssa_opt_dead and ssa_opt_dead
-          %% will help ssa_opt_live.
+          %% and ssa_opt_dead will help ssa_opt_cse.
+          %%
+          %% Run ssa_opt_live twice, because it will help ssa_opt_dead
+          %% and ssa_opt_dead will help ssa_opt_live.
+          %%
+          %% Run beam_ssa_type twice, because there will be more
+          %% opportunities for optimizations after running beam_ssa_dead.
           ?PASS(ssa_opt_cse),
           ?PASS(ssa_opt_type),
           ?PASS(ssa_opt_live),
+          ?PASS(ssa_opt_bs_puts),
           ?PASS(ssa_opt_dead),
           ?PASS(ssa_opt_cse),                   %Second time.
           ?PASS(ssa_opt_float),
+          ?PASS(ssa_opt_type),                  %Second time.
           ?PASS(ssa_opt_live),                  %Second time.
 
           ?PASS(ssa_opt_bsm),
           ?PASS(ssa_opt_bsm_units),
           ?PASS(ssa_opt_bsm_shortcut),
-          ?PASS(ssa_opt_misc),
-          ?PASS(ssa_opt_tuple_size),
           ?PASS(ssa_opt_sw),
           ?PASS(ssa_opt_blockify),
           ?PASS(ssa_opt_sink),
@@ -855,14 +860,9 @@ live_opt_is([#b_set{op=succeeded,dst=SuccDst=SuccDstVar,
              #b_set{dst=Dst}=I|Is], Live0, Acc) ->
     case gb_sets:is_member(Dst, Live0) of
         true ->
-            case gb_sets:is_member(SuccDst, Live0) of
-                true ->
-                    Live1 = gb_sets:add(Dst, Live0),
-                    Live = gb_sets:delete_any(SuccDst, Live1),
-                    live_opt_is([I|Is], Live, [SuccI|Acc]);
-                false ->
-                    live_opt_is([I|Is], Live0, Acc)
-            end;
+            Live1 = gb_sets:add(Dst, Live0),
+            Live = gb_sets:delete_any(SuccDst, Live1),
+            live_opt_is([I|Is], Live, [SuccI|Acc]);
         false ->
             case live_opt_unused(I) of
                 {replace,NewI0} ->
@@ -902,7 +902,13 @@ live_opt_unused(#b_set{op=get_map_element}=Set) ->
 live_opt_unused(_) -> keep.
 
 %%%
-%%% Optimize binary matching instructions.
+%%% Optimize binary matching.
+%%%
+%%% * If the value of segment is never extracted, rewrite
+%%%   to a bs_skip instruction.
+%%%
+%%% * Coalesce adjacent bs_skip instructions and skip instructions
+%%%   with bs_test_tail.
 %%%
 
 ssa_opt_bsm(#st{ssa=Linear}=St) ->
@@ -910,9 +916,10 @@ ssa_opt_bsm(#st{ssa=Linear}=St) ->
     Extracted = cerl_sets:from_list(Extracted0),
     St#st{ssa=bsm_skip(Linear, Extracted)}.
 
-bsm_skip([{L,#b_blk{is=Is0}=Blk}|Bs], Extracted) ->
+bsm_skip([{L,#b_blk{is=Is0}=Blk}|Bs0], Extracted) ->
+    Bs = bsm_skip(Bs0, Extracted),
     Is = bsm_skip_is(Is0, Extracted),
-    [{L,Blk#b_blk{is=Is}}|bsm_skip(Bs, Extracted)];
+    coalesce_skips({L,Blk#b_blk{is=Is}}, Bs);
 bsm_skip([], _) -> [].
 
 bsm_skip_is([I0|Is], Extracted) ->
@@ -943,6 +950,63 @@ bsm_extracted([{_,#b_blk{is=Is}}|Bs]) ->
     end;
 bsm_extracted([]) -> [].
 
+coalesce_skips({L,#b_blk{is=[#b_set{op=bs_extract}=Extract|Is0],
+                         last=Last0}=Blk0}, Bs0) ->
+    case coalesce_skips_is(Is0, Last0, Bs0) of
+        not_possible ->
+            [{L,Blk0}|Bs0];
+        {Is,Last,Bs} ->
+            Blk = Blk0#b_blk{is=[Extract|Is],last=Last},
+            [{L,Blk}|Bs]
+    end;
+coalesce_skips({L,#b_blk{is=Is0,last=Last0}=Blk0}, Bs0) ->
+    case coalesce_skips_is(Is0, Last0, Bs0) of
+        not_possible ->
+            [{L,Blk0}|Bs0];
+        {Is,Last,Bs} ->
+            Blk = Blk0#b_blk{is=Is,last=Last},
+            [{L,Blk}|Bs]
+    end.
+
+coalesce_skips_is([#b_set{op=bs_match,
+                          args=[#b_literal{val=skip},
+                                Ctx0,Type,Flags,
+                                #b_literal{val=Size0},
+                                #b_literal{val=Unit0}]}=Skip0,
+                   #b_set{op=succeeded}],
+                  #b_br{succ=L2,fail=Fail}=Br0,
+                  Bs0) when is_integer(Size0) ->
+    case Bs0 of
+        [{L2,#b_blk{is=[#b_set{op=bs_match,
+                               dst=SkipDst,
+                               args=[#b_literal{val=skip},_,_,_,
+                                     #b_literal{val=Size1},
+                                     #b_literal{val=Unit1}]},
+                        #b_set{op=succeeded}=Succeeded],
+                    last=#b_br{fail=Fail}=Br}}|Bs] when is_integer(Size1) ->
+            SkipBits = Size0 * Unit0 + Size1 * Unit1,
+            Skip = Skip0#b_set{dst=SkipDst,
+                               args=[#b_literal{val=skip},Ctx0,
+                                     Type,Flags,
+                                     #b_literal{val=SkipBits},
+                                     #b_literal{val=1}]},
+            Is = [Skip,Succeeded],
+            {Is,Br,Bs};
+        [{L2,#b_blk{is=[#b_set{op=bs_test_tail,
+                               args=[_Ctx,#b_literal{val=TailSkip}]}],
+                    last=#b_br{succ=NextSucc,fail=Fail}}}|Bs] ->
+            SkipBits = Size0 * Unit0,
+            TestTail = Skip0#b_set{op=bs_test_tail,
+                                   args=[Ctx0,#b_literal{val=SkipBits+TailSkip}]},
+            Br = Br0#b_br{bool=TestTail#b_set.dst,succ=NextSucc},
+            Is = [TestTail],
+            {Is,Br,Bs};
+        _ ->
+            not_possible
+    end;
+coalesce_skips_is(_, _, _) ->
+    not_possible.
+
 %%%
 %%% Short-cutting binary matching instructions.
 %%%
@@ -1117,91 +1181,172 @@ bsm_units_join_1([], _MapA, Right) ->
     Right.
 
 %%%
-%%% Miscellanous optimizations in execution order.
+%%% Optimize binary construction.
+%%%
+%%% If an integer segment or a float segment has a literal size and
+%%% a literal value, convert to a binary segment. Coalesce adjacent
+%%% literal binary segments. Literal binary segments will be converted
+%%% to bs_put_string instructions in later pass.
 %%%
 
-ssa_opt_misc(#st{ssa=Linear}=St) ->
-    St#st{ssa=misc_opt(Linear, #{})}.
+ssa_opt_bs_puts(#st{ssa=Linear0,cnt=Count0}=St) ->
+    {Linear,Count} = opt_bs_puts(Linear0, Count0, []),
+    St#st{ssa=Linear,cnt=Count}.
 
-misc_opt([{L,#b_blk{is=Is0,last=Last0}=Blk0}|Bs], Sub0) ->
-    {Is,Sub} = misc_opt_is(Is0, Sub0, []),
-    Last = sub(Last0, Sub),
-    Blk = Blk0#b_blk{is=Is,last=Last},
-    [{L,Blk}|misc_opt(Bs, Sub)];
-misc_opt([], _) -> [].
+opt_bs_puts([{L,#b_blk{is=Is}=Blk0}|Bs], Count0, Acc0) ->
+    case Is of
+        [#b_set{op=bs_put}=I0] ->
+            case opt_bs_put(L, I0, Blk0, Count0, Acc0) of
+                not_possible ->
+                    opt_bs_puts(Bs, Count0, [{L,Blk0}|Acc0]);
+                {Count,Acc1} ->
+                    Acc = opt_bs_puts_merge(Acc1),
+                    opt_bs_puts(Bs, Count, Acc)
+            end;
+        _ ->
+            opt_bs_puts(Bs, Count0, [{L,Blk0}|Acc0])
+    end;
+opt_bs_puts([], Count, Acc) ->
+    {reverse(Acc),Count}.
+
+opt_bs_puts_merge([{L1,#b_blk{is=Is}=Blk0},{L2,#b_blk{is=AccIs}}=BAcc|Acc]) ->
+    case {AccIs,Is} of
+        {[#b_set{op=bs_put,
+                 args=[#b_literal{val=binary},
+                       #b_literal{},
+                       #b_literal{val=Bin0},
+                       #b_literal{val=all},
+                       #b_literal{val=1}]}],
+         [#b_set{op=bs_put,
+                 args=[#b_literal{val=binary},
+                       #b_literal{},
+                       #b_literal{val=Bin1},
+                       #b_literal{val=all},
+                       #b_literal{val=1}]}=I0]} ->
+            %% Coalesce the two segments to one.
+            Bin = <<Bin0/bitstring,Bin1/bitstring>>,
+            I = I0#b_set{args=bs_put_args(binary, Bin, all)},
+            Blk = Blk0#b_blk{is=[I]},
+            [{L2,Blk}|Acc];
+        {_,_} ->
+            [{L1,Blk0},BAcc|Acc]
+    end.
 
-misc_opt_is([#b_set{op=phi}=I0|Is], Sub0, Acc) ->
-    #b_set{dst=Dst,args=Args} = I = sub(I0, Sub0),
-    case all_same(Args) of
+opt_bs_put(L, I0, #b_blk{last=Br0}=Blk0, Count0, Acc) ->
+    case opt_bs_put(I0) of
+        [Bin] when is_bitstring(Bin) ->
+            Args = bs_put_args(binary, Bin, all),
+            I = I0#b_set{args=Args},
+            Blk = Blk0#b_blk{is=[I]},
+            {Count0,[{L,Blk}|Acc]};
+        [{int,Int,Size},Bin] when is_bitstring(Bin) ->
+            %% Construct a bs_put_integer instruction following
+            %% by a bs_put_binary instruction.
+            IntArgs = bs_put_args(integer, Int, Size),
+            BinArgs = bs_put_args(binary, Bin, all),
+            {BinL,BinVarNum} = {Count0,Count0+1},
+            Count = Count0 + 2,
+            BinVar = #b_var{name={'@ssa_bool',BinVarNum}},
+            BinI = I0#b_set{dst=BinVar,args=BinArgs},
+            BinBlk = Blk0#b_blk{is=[BinI],last=Br0#b_br{bool=BinVar}},
+            IntI = I0#b_set{args=IntArgs},
+            IntBlk = Blk0#b_blk{is=[IntI],last=Br0#b_br{succ=BinL}},
+            {Count,[{BinL,BinBlk},{L,IntBlk}|Acc]};
+        not_possible ->
+            not_possible
+    end.
+
+opt_bs_put(#b_set{args=[#b_literal{val=binary},_,#b_literal{val=Val},
+                        #b_literal{val=all},#b_literal{val=Unit}]})
+  when is_bitstring(Val) ->
+    if
+        bit_size(Val) rem Unit =:= 0 ->
+            [Val];
         true ->
-            %% Eliminate the phi node if there is just one source
-            %% value or if the values are identical.
-            [{Val,_}|_] = Args,
-            Sub = Sub0#{Dst=>Val},
-            misc_opt_is(Is, Sub, Acc);
-        false ->
-            misc_opt_is(Is, Sub0, [I|Acc])
-    end;
-misc_opt_is([#b_set{op={bif,'and'}}=I0], Sub, Acc) ->
-    #b_set{dst=Dst,args=Args} = I = sub(I0, Sub),
-    case eval_and(Args) of
-        error ->
-            misc_opt_is([], Sub, [I|Acc]);
-        Val ->
-            misc_opt_is([], Sub#{Dst=>Val}, Acc)
+            not_possible
     end;
-misc_opt_is([#b_set{op={bif,'or'}}=I0], Sub, Acc) ->
-    #b_set{dst=Dst,args=Args} = I = sub(I0, Sub),
-    case eval_or(Args) of
-        error ->
-            misc_opt_is([], Sub, [I|Acc]);
-        Val ->
-            misc_opt_is([], Sub#{Dst=>Val}, Acc)
-    end;
-misc_opt_is([#b_set{}=I0|Is], Sub, Acc) ->
-    #b_set{op=Op,dst=Dst,args=Args} = I = sub(I0, Sub),
-    case make_literal(Op, Args) of
-        #b_literal{}=Literal ->
-            misc_opt_is(Is, Sub#{Dst=>Literal}, Acc);
-        error ->
-            misc_opt_is(Is, Sub, [I|Acc])
+opt_bs_put(#b_set{args=[#b_literal{val=Type},#b_literal{val=Flags},
+                        #b_literal{val=Val},#b_literal{val=Size},
+                        #b_literal{val=Unit}]}=I0) when is_integer(Size) ->
+    EffectiveSize = Size * Unit,
+    if
+        EffectiveSize > 0 ->
+            case {Type,opt_bs_put_endian(Flags)} of
+                {integer,big} when is_integer(Val) ->
+                    if
+                        EffectiveSize < 64 ->
+                            [<<Val:EffectiveSize>>];
+                        true ->
+                            opt_bs_put_split_int(Val, EffectiveSize)
+                    end;
+                {integer,little} when is_integer(Val), EffectiveSize < 128 ->
+                    %% To avoid an explosion in code size, we only try
+                    %% to optimize relatively small fields.
+                    <<Int:EffectiveSize>> = <<Val:EffectiveSize/little>>,
+                    Args = bs_put_args(Type, Int, EffectiveSize),
+                    I = I0#b_set{args=Args},
+                    opt_bs_put(I);
+                {binary,_} when is_bitstring(Val) ->
+                    <<Bitstring:EffectiveSize/bits,_/bits>> = Val,
+                    [Bitstring];
+                {float,Endian} ->
+                    try
+                        [opt_bs_put_float(Val, EffectiveSize, Endian)]
+                    catch error:_ ->
+                            not_possible
+                    end;
+                {_,_} ->
+                    not_possible
+            end;
+        true ->
+            not_possible
     end;
-misc_opt_is([], Sub, Acc) ->
-    {reverse(Acc),Sub}.
+opt_bs_put(#b_set{}) -> not_possible.
 
-all_same([{H,_}|T]) ->
-    all(fun({E,_}) -> E =:= H end, T).
-
-make_literal(put_tuple, Args) ->
-    case make_literal_list(Args, []) of
-        error ->
-            error;
-        List ->
-            #b_literal{val=list_to_tuple(List)}
-    end;
-make_literal(put_list, [#b_literal{val=H},#b_literal{val=T}]) ->
-    #b_literal{val=[H|T]};
-make_literal(_, _) -> error.
+opt_bs_put_float(N, Sz, Endian) ->
+    case Endian of
+        big -> <<N:Sz/big-float-unit:1>>;
+        little -> <<N:Sz/little-float-unit:1>>
+    end.
 
-make_literal_list([#b_literal{val=H}|T], Acc) ->
-    make_literal_list(T, [H|Acc]);
-make_literal_list([_|_], _) ->
-    error;
-make_literal_list([], Acc) ->
-    reverse(Acc).
-
-eval_and(Args) ->
-    case Args of
-        [_,#b_literal{val=false}=Res] -> Res;
-        [Res,#b_literal{val=true}] -> Res;
-        [_,_] -> error
+bs_put_args(Type, Val, Size) ->
+    [#b_literal{val=Type},
+     #b_literal{val=[unsigned,big]},
+     #b_literal{val=Val},
+     #b_literal{val=Size},
+     #b_literal{val=1}].
+
+opt_bs_put_endian([big=E|_]) -> E;
+opt_bs_put_endian([little=E|_]) -> E;
+opt_bs_put_endian([native=E|_]) -> E;
+opt_bs_put_endian([_|Fs]) -> opt_bs_put_endian(Fs).
+
+opt_bs_put_split_int(Int, Size) ->
+    Pos = opt_bs_put_split_int_1(Int, 0, Size - 1),
+    UpperSize = Size - Pos,
+    if
+        Pos =:= 0 ->
+            %% Value is 0 or -1 -- keep the original instruction.
+            not_possible;
+        UpperSize < 64 ->
+            %% No or few leading zeroes or ones.
+            [<<Int:Size>>];
+        true ->
+            %% There are 64 or more leading ones or zeroes in
+            %% the resulting binary. Split into two separate
+            %% segments to avoid an explosion in code size.
+            [{int,Int bsr Pos,UpperSize},<<Int:Pos>>]
     end.
 
-eval_or(Args) ->
-    case Args of
-        [Res,#b_literal{val=false}] -> Res;
-        [_,#b_literal{val=true}=Res] -> Res;
-        [_,_] -> error
+opt_bs_put_split_int_1(_Int, L, R) when L > R ->
+    8 * ((L + 7) div 8);
+opt_bs_put_split_int_1(Int, L, R) ->
+    Mid = (L + R) div 2,
+    case Int bsr Mid of
+        Upper when Upper =:= 0; Upper =:= -1 ->
+            opt_bs_put_split_int_1(Int, L, Mid - 1);
+        _ ->
+            opt_bs_put_split_int_1(Int, Mid + 1, R)
     end.
 
 %%%
diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl
index 56fe9b4793..fa1b7bb71e 100644
--- a/lib/compiler/src/beam_ssa_pre_codegen.erl
+++ b/lib/compiler/src/beam_ssa_pre_codegen.erl
@@ -1996,18 +1996,26 @@ reserve_zregs(Blocks, Intervals, Res) ->
 reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst},
               #b_set{op={bif,'=:='},args=[Dst,Val]}], Last, ShortLived, A0) ->
     case {Val,Last} of
-        {#b_literal{val=Arity},#b_br{}} when Arity bsr 32 =:= 0 ->
+        {#b_literal{val=Arity},#b_br{bool=#b_var{}}} when Arity bsr 32 =:= 0 ->
             %% These two instructions can be combined to a test_arity
             %% instruction provided that the arity variable is short-lived.
             reserve_zreg_1(Dst, ShortLived, A0);
         {_,_} ->
-            %% Either the arity is too big, or the boolean value from
-            %% '=:=' will be returned.
+            %% Either the arity is too big, or the boolean value is not
+            %% used in a conditional branch.
             A0
     end;
 reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}],
              #b_switch{}, ShortLived, A) ->
     reserve_zreg_1(Dst, ShortLived, A);
+reserve_zreg([#b_set{op={bif,'xor'}}], _Last, _ShortLived, A) ->
+    %% There is no short, easy way to rewrite 'xor' to a series of
+    %% test instructions.
+    A;
+reserve_zreg([#b_set{op={bif,is_record}}], _Last, _ShortLived, A) ->
+    %% There is no short, easy way to rewrite is_record/2 to a series of
+    %% test instructions.
+    A;
 reserve_zreg([#b_set{op=Op,dst=Dst}|Is], Last, ShortLived, A0) ->
     IsZReg = case Op of
                  bs_match_string -> true;
diff --git a/lib/compiler/src/beam_ssa_type.erl b/lib/compiler/src/beam_ssa_type.erl
index 95fc3bb0e9..ede57875e2 100644
--- a/lib/compiler/src/beam_ssa_type.erl
+++ b/lib/compiler/src/beam_ssa_type.erl
@@ -23,7 +23,7 @@
 
 -include("beam_ssa.hrl").
 -import(lists, [all/2,any/2,droplast/1,foldl/3,last/1,member/2,
-                reverse/1,sort/1]).
+                partition/2,reverse/1,sort/1]).
 
 -define(UNICODE_INT, #t_integer{elements={0,16#10FFFF}}).
 
@@ -139,6 +139,19 @@ opt_is([#b_set{op=phi,dst=Dst,args=Args0}=I0|Is],
             Ds = Ds0#{Dst=>I},
             opt_is(Is, Ts, Ds, Ls, Sub0, [I|Acc])
     end;
+opt_is([#b_set{op=succeeded,args=Args0,dst=Dst}=I],
+       Ts0, Ds0, Ls, Sub0, Acc) ->
+    Args = simplify_args(Args0, Sub0, Ts0),
+    Type = type(succeeded, Args, Ts0, Ds0),
+    case get_literal_from_type(Type) of
+        #b_literal{}=Lit ->
+            Sub = Sub0#{Dst=>Lit},
+            opt_is([], Ts0, Ds0, Ls, Sub, Acc);
+        none ->
+            Ts = update_types(I, Ts0, Ds0),
+            Ds = Ds0#{Dst=>I},
+            opt_is([], Ts, Ds, Ls, Sub0, [I|Acc])
+    end;
 opt_is([#b_set{args=Args0,dst=Dst}=I0|Is],
        Ts0, Ds0, Ls, Sub0, Acc) ->
     Args = simplify_args(Args0, Sub0, Ts0),
@@ -153,8 +166,15 @@ opt_is([#b_set{args=Args0,dst=Dst}=I0|Is],
             Sub = Sub0#{Dst=>Lit},
             opt_is(Is, Ts0, Ds0, Ls, Sub, Acc);
         #b_var{}=Var ->
-            Sub = Sub0#{Dst=>Var},
-            opt_is(Is, Ts0, Ds0, Ls, Sub, Acc)
+            case Is of
+                [#b_set{op=succeeded,dst=SuccDst,args=[Dst]}] ->
+                    %% We must remove this 'succeeded' instruction.
+                    Sub = Sub0#{Dst=>Var,SuccDst=>#b_literal{val=true}},
+                    opt_is([], Ts0, Ds0, Ls, Sub, Acc);
+                _ ->
+                    Sub = Sub0#{Dst=>Var},
+                    opt_is(Is, Ts0, Ds0, Ls, Sub, Acc)
+            end
     end;
 opt_is([], Ts, Ds, _Ls, Sub, Acc) ->
     {reverse(Acc),Ts,Ds,Sub}.
@@ -289,8 +309,6 @@ simplify(#b_set{op=put_tuple,args=Args}=I, _Ts) ->
         none -> I;
         List -> #b_literal{val=list_to_tuple(List)}
     end;
-simplify(#b_set{op=succeeded,args=[#b_literal{}]}, _Ts) ->
-    #b_literal{val=true};
 simplify(#b_set{op=wait_timeout,args=[#b_literal{val=infinity}]}=I, _Ts) ->
     I#b_set{op=wait,args=[]};
 simplify(I, _Ts) -> I.
@@ -436,12 +454,12 @@ update_successors(#b_br{bool=#b_var{}=Bool,succ=Succ,fail=Fail}, Ts0, D0) ->
             %% no need to include the type database passed on to the
             %% successors of this block.
             Ts = maps:remove(Bool, Ts0),
-            D = update_successor(Fail, Ts, D0),
-            SuccTs = infer_types(Bool, Ts, D0),
+            {SuccTs,FailTs} = infer_types(Bool, Ts, D0),
+            D = update_successor(Fail, FailTs, D0),
             update_successor(Succ, SuccTs, D);
         false ->
-            D = update_successor_bool(Bool, false, Fail, Ts0, D0),
-            SuccTs = infer_types(Bool, Ts0, D0),
+            {SuccTs,FailTs} = infer_types(Bool, Ts0, D0),
+            D = update_successor_bool(Bool, false, Fail, FailTs, D0),
             update_successor_bool(Bool, true, Succ, SuccTs, D)
     end;
 update_successors(#b_switch{arg=#b_var{}=V,fail=Fail,list=List}, Ts0, D0) ->
@@ -458,7 +476,8 @@ update_successors(#b_switch{arg=#b_var{}=V,fail=Fail,list=List}, Ts0, D0) ->
                 end,
             foldl(F, D, List);
         false ->
-            D = update_successor(Fail, Ts0, D0),
+            FailTs = subtract_types([{V,join_sw_list(List, Ts0, none)}], Ts0),
+            D = update_successor(Fail, FailTs, D0),
             F = fun({Val,S}, A) ->
                         T = get_type(Val, Ts0),
                         update_successor(S, Ts0#{V=>T}, A)
@@ -467,6 +486,10 @@ update_successors(#b_switch{arg=#b_var{}=V,fail=Fail,list=List}, Ts0, D0) ->
         end;
 update_successors(#b_ret{}, _Ts, D) -> D.
 
+join_sw_list([{Val,_}|T], Ts, Type) ->
+    join_sw_list(T, Ts, join(Type, get_type(Val, Ts)));
+join_sw_list([], _, Type) -> Type.
+
 update_successor_bool(#b_var{}=Var, BoolValue, S, Ts, D) ->
     case t_is_boolean(get_type(Var, Ts)) of
         true ->
@@ -542,6 +565,14 @@ type(call, [#b_remote{mod=#b_literal{val=Mod},
                 {_,_} ->
                     #t_tuple{}
             end;
+        {erlang,'++',[List1,List2]} ->
+            case get_type(List1, Ts) =:= cons orelse
+                get_type(List2, Ts) =:= cons of
+                true -> cons;
+                false -> list
+            end;
+        {erlang,'--',[_,_]} ->
+            list;
         {math,_,_} ->
             case is_math_bif(Name, length(Args)) of
                 false -> any;
@@ -607,6 +638,8 @@ type(succeeded, [#b_var{}=Src], Ts, Ds) ->
         #b_set{} ->
             t_boolean()
     end;
+type(succeeded, [#b_literal{}], _Ts, _Ds) ->
+    t_atom(true);
 type(_, _, _, _) -> any.
 
 arith_op_type(Args, Ts) ->
@@ -873,10 +906,84 @@ get_type(#b_literal{val=Val}, _Ts) ->
             any
     end.
 
-infer_types(#b_var{}=V, Ts, #d{ds=Ds}) ->
+%% infer_types(Var, Types, #d{}) -> {SuccTypes,FailTypes}
+%%  Looking at the expression that defines the variable Var, infer
+%%  the types for the variables in the arguments. Return the updated
+%%  type database for the case that the expression evaluates to
+%%  true, and and for the case that it evaluates to false.
+%%
+%%  Here is an example. The variable being asked about is
+%%  the variable Bool, which is defined like this:
+%%
+%%     Bool = is_nonempty_list L
+%%
+%%  If 'is_nonempty_list L' evaluates to 'true', L must
+%%  must be cons. The meet of the previously known type of L and 'cons'
+%%  will be added to SuccTypes.
+%%
+%%  On the other hand, if 'is_nonempty_list L' evaluates to false, L
+%%  is not cons and cons can be subtracted from the previously known
+%%  type for L. For example, if L was known to be 'list', subtracting
+%%  'cons' would give 'nil' as the only possible type. The result of the
+%%  subtraction for L will be added to FailTypes.
+%%
+%%  Here is another example, asking about the variable Bool:
+%%
+%%     Head = bif:hd L
+%%     Bool = succeeded Head
+%%
+%%  'succeeded Head' will evaluate to 'true' if the instrution that
+%%  defined Head succeeded. In this case, it is the 'bif:hd L'
+%%  instruction, which will succeed if L is 'cons'. Thus, the meet of
+%%  the previous type for L and 'cons' will be added to SuccTypes.
+%%
+%%  If 'succeeded Head' evaluates to 'false', it means that 'bif:hd L'
+%%  failed and that L is not 'cons'. 'cons' can be subtracted from the
+%%  previously known type for L and the result put in FailTypes.
+
+infer_types(#b_var{}=V, Ts, #d{ds=Ds,once=Once}) ->
     #{V:=#b_set{op=Op,args=Args}} = Ds,
-    Types = infer_type(Op, Args, Ds),
-    meet_types(Types, Ts).
+    Types0 = infer_type(Op, Args, Ds),
+
+    %% We must be careful with types inferred from '=:='.
+    %%
+    %% If we have seen L =:= [a], we know that L is 'cons' if the
+    %% comparison succeeds. However, if the comparison fails, L could
+    %% still be 'cons'. Therefore, we must not subtract 'cons' from the
+    %% previous type of L.
+    %%
+    %% However, it is safe to subtract a type inferred from '=:=' if
+    %% it is single-valued, e.g. if it is [] or the atom 'true'.
+    EqTypes0 = infer_eq_type(Op, Args, Ts, Ds),
+    {Types1,EqTypes} = partition(fun({_,T}) ->
+                                         is_singleton_type(T)
+                                 end, EqTypes0),
+
+    %% Don't bother updating the types for variables that
+    %% are never used again.
+    Types2 = Types1 ++ Types0,
+    Types = [P || {InfV,_}=P <- Types2, not cerl_sets:is_element(InfV, Once)],
+
+    {meet_types(EqTypes++Types, Ts),subtract_types(Types, Ts)}.
+
+infer_eq_type({bif,'=:='}, [#b_var{}=Src,#b_literal{}=Lit], Ts, Ds) ->
+    Def = maps:get(Src, Ds),
+    Type = get_type(Lit, Ts),
+    [{Src,Type}|infer_tuple_size(Def, Lit) ++
+         infer_first_element(Def, Lit)];
+infer_eq_type({bif,'=:='}, [#b_var{}=Arg0,#b_var{}=Arg1], Ts, _Ds) ->
+    %% As an example, assume that L1 is known to be 'list', and L2 is
+    %% known to be 'cons'. Then if 'L1 =:= L2' evaluates to 'true', it can
+    %% be inferred that L1 is 'cons' (the meet of 'cons' and 'list').
+    Type0 = get_type(Arg0, Ts),
+    Type1 = get_type(Arg1, Ts),
+    Type = meet(Type0, Type1),
+    [{V,MeetType} ||
+        {V,OrigType,MeetType} <-
+            [{Arg0,Type0,Type},{Arg1,Type1,Type}],
+        OrigType =/= MeetType];
+infer_eq_type(_Op, _Args, _Ts, _Ds) ->
+    [].
 
 infer_type({bif,element}, [#b_literal{val=Pos},#b_var{}=Tuple], _Ds) ->
     if
@@ -885,20 +992,27 @@ infer_type({bif,element}, [#b_literal{val=Pos},#b_var{}=Tuple], _Ds) ->
         true ->
             []
     end;
-infer_type({bif,'=:='}, [#b_var{}=Src,#b_literal{}=Lit], Ds) ->
-    Def = maps:get(Src, Ds),
-    Type = get_type(Lit, #{}),
-    [{Src,Type}|infer_tuple_size(Def, Lit) ++
-         infer_first_element(Def, Lit)];
+infer_type({bif,element}, [#b_var{}=Position,#b_var{}=Tuple], _Ds) ->
+    [{Position,t_integer()},{Tuple,#t_tuple{}}];
 infer_type({bif,Bif}, [#b_var{}=Src]=Args, _Ds) ->
     case inferred_bif_type(Bif, Args) of
         any -> [];
         T -> [{Src,T}]
     end;
+infer_type({bif,binary_part}, [#b_var{}=Src,_], _Ds) ->
+    [{Src,{binary,8}}];
 infer_type({bif,is_map_key}, [_,#b_var{}=Src], _Ds) ->
     [{Src,map}];
 infer_type({bif,map_get}, [_,#b_var{}=Src], _Ds) ->
     [{Src,map}];
+infer_type({bif,Bif}, [_,_]=Args, _Ds) ->
+    case inferred_bif_type(Bif, Args) of
+        any -> [];
+        T -> [{A,T} || #b_var{}=A <- Args]
+    end;
+infer_type({bif,binary_part}, [#b_var{}=Src,Pos,Len], _Ds) ->
+    [{Src,{binary,8}}|
+     [{V,t_integer()} || #b_var{}=V <- [Pos,Len]]];
 infer_type(bs_start_match, [#b_var{}=Bin], _Ds) ->
     [{Bin,{binary,1}}];
 infer_type(is_nonempty_list, [#b_var{}=Src], _Ds) ->
@@ -969,6 +1083,7 @@ inferred_bif_type(is_number, [_]) -> number;
 inferred_bif_type(is_tuple, [_]) -> #t_tuple{};
 inferred_bif_type(abs, [_]) -> number;
 inferred_bif_type(bit_size, [_]) -> {binary,1};
+inferred_bif_type('bnot', [_]) -> t_integer();
 inferred_bif_type(byte_size, [_]) -> {binary,1};
 inferred_bif_type(ceil, [_]) -> number;
 inferred_bif_type(float, [_]) -> number;
@@ -976,10 +1091,25 @@ inferred_bif_type(floor, [_]) -> number;
 inferred_bif_type(hd, [_]) -> cons;
 inferred_bif_type(length, [_]) -> list;
 inferred_bif_type(map_size, [_]) -> map;
+inferred_bif_type('not', [_]) -> t_boolean();
 inferred_bif_type(round, [_]) -> number;
 inferred_bif_type(trunc, [_]) -> number;
 inferred_bif_type(tl, [_]) -> cons;
 inferred_bif_type(tuple_size, [_]) -> #t_tuple{};
+inferred_bif_type('and', [_,_]) -> t_boolean();
+inferred_bif_type('or', [_,_]) -> t_boolean();
+inferred_bif_type('xor', [_,_]) -> t_boolean();
+inferred_bif_type('band', [_,_]) -> t_integer();
+inferred_bif_type('bor', [_,_]) -> t_integer();
+inferred_bif_type('bsl', [_,_]) -> t_integer();
+inferred_bif_type('bsr', [_,_]) -> t_integer();
+inferred_bif_type('bxor', [_,_]) -> t_integer();
+inferred_bif_type('div', [_,_]) -> t_integer();
+inferred_bif_type('rem', [_,_]) -> t_integer();
+inferred_bif_type('+', [_,_]) -> number;
+inferred_bif_type('-', [_,_]) -> number;
+inferred_bif_type('*', [_,_]) -> number;
+inferred_bif_type('/', [_,_]) -> number;
 inferred_bif_type(_, _) -> any.
 
 infer_tuple_size(#b_set{op={bif,tuple_size},args=[#b_var{}=Tuple]},
@@ -1088,6 +1218,9 @@ t_tuple_size(#t_tuple{size=Size,exact=true}) ->
 t_tuple_size(_) ->
     none.
 
+is_singleton_type(Type) ->
+    get_literal_from_type(Type) =/= none.
+
 %% join(Type1, Type2) -> Type
 %%  Return the "join" of Type1 and Type2. The join is a more general
 %%  type than Type1 and Type2. For example:
@@ -1152,14 +1285,40 @@ gcd(A, B) ->
 
 meet_types([{V,T0}|Vs], Ts) ->
     #{V:=T1} = Ts,
-    T = meet(T0, T1),
-    meet_types(Vs, Ts#{V:=T});
+    case meet(T0, T1) of
+        T1 -> meet_types(Vs, Ts);
+        T -> meet_types(Vs, Ts#{V:=T})
+    end;
 meet_types([], Ts) -> Ts.
 
 meet([T1,T2|Ts]) ->
     meet([meet(T1, T2)|Ts]);
 meet([T]) -> T.
 
+subtract_types([{V,T0}|Vs], Ts) ->
+    #{V:=T1} = Ts,
+    case subtract(T1, T0) of
+        T1 -> subtract_types(Vs, Ts);
+        T -> subtract_types(Vs, Ts#{V:=T})
+    end;
+subtract_types([], Ts) -> Ts.
+
+%% subtract(Type1, Type2) -> Type.
+%%  Subtract Type2 from Type1. Example:
+%%
+%%     subtract(list, cons) -> nil
+
+subtract(#t_atom{elements=[_|_]=Set0}, #t_atom{elements=[_|_]=Set1}) ->
+    case ordsets:subtract(Set0, Set1) of
+        [] -> none;
+        [_|_]=Set -> #t_atom{elements=Set}
+    end;
+subtract(number, float) -> #t_integer{};
+subtract(number, #t_integer{elements=any}) -> float;
+subtract(list, cons) -> nil;
+subtract(list, nil) -> cons;
+subtract(T, _) -> T.
+
 %% meet(Type1, Type2) -> Type
 %%  Return the "meet" of Type1 and Type2. The meet is a narrower
 %%  type than Type1 and Type2. For example:
diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl
index 1945faba7f..3d53054f69 100644
--- a/lib/compiler/src/beam_validator.erl
+++ b/lib/compiler/src/beam_validator.erl
@@ -809,6 +809,14 @@ valfun_4({test,has_map_fields,{f,Lbl},Src,{list,List}}, Vst) ->
     assert_type(map, Src, Vst),
     assert_unique_map_keys(List),
     branch_state(Lbl, Vst);
+valfun_4({test,is_list,{f,Lbl},[Src]}, Vst) ->
+    validate_src([Src], Vst),
+    Type = case get_term_type(Src, Vst) of
+               cons -> cons;
+               nil -> nil;
+               _ -> list
+           end,
+    set_aliased_type(Type, Src, branch_state(Lbl, Vst));
 valfun_4({test,is_map,{f,Lbl},[Src]}, Vst0) ->
     Vst = branch_state(Lbl, Vst0),
     case Src of
@@ -820,20 +828,28 @@ valfun_4({test,is_map,{f,Lbl},[Src]}, Vst0) ->
 	_ ->
 	    kill_state(Vst0)
     end;
+valfun_4({test,is_nil,{f,Lbl},[Src]}, Vst0) ->
+    Vst = case get_term_type(Src, Vst0) of
+               list ->
+                   branch_state(Lbl, set_type_reg(cons, Src, Vst0));
+               _ ->
+                   branch_state(Lbl, Vst0)
+           end,
+    set_aliased_type(nil, Src, Vst);
 valfun_4({test,is_eq_exact,{f,Lbl},[Src,Val]=Ss}, Vst0) ->
     validate_src(Ss, Vst0),
     Infer = infer_types(Src, Vst0),
     Vst1 = Infer(Val, Vst0),
-    Vst = branch_state(Lbl, Vst1),
-    case Val of
-        {literal,Tuple} when is_tuple(Tuple) ->
-            Type0 = get_term_type(Val, Vst),
-            Type = upgrade_tuple_type({tuple,tuple_size(Tuple)},
-                                      Type0),
-            set_aliased_type(Type, Src, Vst);
-        _ ->
-            Vst
-    end;
+    Vst2 = upgrade_ne_types(Src, Val, Vst1),
+    Vst3 = branch_state(Lbl, Vst2),
+    Vst = Vst3#vst{current=Vst1#vst.current},
+    upgrade_eq_types(Src, Val, Vst);
+valfun_4({test,is_ne_exact,{f,Lbl},[Src,Val]=Ss}, Vst0) ->
+    validate_src(Ss, Vst0),
+    Vst1 = upgrade_eq_types(Src, Val, Vst0),
+    Vst2 = branch_state(Lbl, Vst1),
+    Vst = Vst2#vst{current=Vst0#vst.current},
+    upgrade_ne_types(Src, Val, Vst);
 valfun_4({test,_Op,{f,Lbl},Src}, Vst) ->
     validate_src(Src, Vst),
     branch_state(Lbl, Vst);
@@ -920,6 +936,25 @@ valfun_4({get_map_elements,{f,Fail},Src,{list,List}}, Vst) ->
 valfun_4(_, _) ->
     error(unknown_instruction).
 
+upgrade_ne_types(Src1, Src2, Vst0) ->
+    T1 = get_durable_term_type(Src1, Vst0),
+    T2 = get_durable_term_type(Src2, Vst0),
+    Type = subtract(T1, T2),
+    set_aliased_type(Type, Src1, Vst0).
+
+upgrade_eq_types(Src1, Src2, Vst0) ->
+    T1 = get_durable_term_type(Src1, Vst0),
+    T2 = get_durable_term_type(Src2, Vst0),
+    Meet = meet(T1, T2),
+    Vst = case T1 =/= Meet of
+              true -> set_aliased_type(Meet, Src1, Vst0);
+              false -> Vst0
+          end,
+    case T2 =/= Meet of
+        true -> set_aliased_type(Meet, Src2, Vst);
+        false -> Vst
+    end.
+
 verify_get_map(Fail, Src, List, Vst0) ->
     assert_not_literal(Src),                    %OTP 22.
     assert_type(map, Src, Vst0),
@@ -1509,12 +1544,16 @@ assert_not_literal(Literal) -> error({literal_not_allowed,Literal}).
 %%
 %% term			Any valid Erlang (but not of the special types above).
 %%
+%% binary               Binary or bitstring.
+%%
 %% bool			The atom 'true' or the atom 'false'.
 %%
 %% cons         	Cons cell: [_|_]
 %%
 %% nil			Empty list: []
 %%
+%% list                 List: [] or [_|_]
+%%
 %% {tuple,[Sz]}		Tuple. An element has been accessed using
 %%              	element/2 or setelement/3 so that it is known that
 %%              	the type is a tuple of size at least Sz.
@@ -1535,7 +1574,7 @@ assert_not_literal(Literal) -> error({literal_not_allowed,Literal}).
 %%
 %% map			Map.
 %%
-%%
+%% none                 A conflict in types. There will be an exception at runtime.
 %%
 %% FRAGILITY
 %% ---------
@@ -1548,6 +1587,47 @@ assert_not_literal(Literal) -> error({literal_not_allowed,Literal}).
 %% Such terms are wrapped in a {fragile,Type} tuple, where Type is one
 %% of the types described above.
 
+%% meet(Type1, Type2) -> Type
+%%  Return the meet of two types. The meet is a more specific type.
+%%  It will be 'none' if the types are in conflict.
+
+meet(Same, Same) ->
+    Same;
+meet(term, Other) ->
+    Other;
+meet(Other, term) ->
+    Other;
+meet(T1, T2) ->
+    case {erlang:min(T1, T2),erlang:max(T1, T2)} of
+        {{atom,_}=A,{atom,[]}} -> A;
+        {bool,{atom,B}=Atom} when is_boolean(B) -> Atom;
+        {bool,{atom,[]}} -> bool;
+        {cons,list} -> cons;
+        {{float,_}=T,{float,[]}} -> T;
+        {{integer,_}=T,{integer,[]}} -> T;
+        {list,nil} -> nil;
+        {number,{integer,_}=T} -> T;
+        {number,{float,_}=T} -> T;
+        {{tuple,Size1},{tuple,Size2}} ->
+            case {Size1,Size2} of
+                {[Sz1],[Sz2]} ->
+                    {tuple,[erlang:max(Sz1, Sz2)]};
+                {Sz1,[Sz2]} when Sz2 =< Sz1 ->
+                    {tuple,Sz1};
+                {_,_} ->
+                    none
+            end;
+        {_,_} -> none
+    end.
+
+%% subtract(Type1, Type2) -> Type
+%%  Subtract Type2 from Type2. Example:
+%%      subtract(list, nil) -> cons
+
+subtract(list, nil) -> cons;
+subtract(list, cons) -> nil;
+subtract(Type, _) -> Type.
+
 assert_type(WantedType, Term, Vst) ->
     case get_term_type(Term, Vst) of
         {fragile,Type} ->
@@ -1581,25 +1661,27 @@ assert_type(Needed, Actual) ->
 %%  be executed at run-time.
 
 upgrade_tuple_type(NewType, {fragile,OldType}) ->
-    make_fragile(upgrade_tuple_type_1(NewType, OldType));
+    Type = upgrade_tuple_type_1(NewType, OldType),
+    make_fragile(Type);
 upgrade_tuple_type(NewType, OldType) ->
     upgrade_tuple_type_1(NewType, OldType).
 
-upgrade_tuple_type_1({tuple,[Sz]}, {tuple,[OldSz]}=T) when Sz < OldSz ->
-    %% The old type has a higher value for the least tuple size.
-    T;
-upgrade_tuple_type_1({tuple,[Sz]}, {tuple,OldSz}=T)
-  when is_integer(Sz), is_integer(OldSz), Sz =< OldSz ->
-    %% The old size is exact, and the new size is smaller than the old size.
-    T;
-upgrade_tuple_type_1({tuple,_}=T, _) ->
-    %% The new type information is exact or has a higher value for
-    %% the least tuple size.
-    %%     Note that inconsistencies are also handled in this
-    %% clause, e.g. if the old type was an integer or a tuple accessed
-    %% outside its size; inconsistences will generally cause an exception
-    %% at run-time but are safe from our point of view.
-    T.
+upgrade_tuple_type_1(NewType, OldType) ->
+    case meet(NewType, OldType) of
+        none ->
+            %% Unoptimized code may look like this:
+            %%
+            %%   {test,is_list,Fail,[Reg]}.
+            %%   {test,is_tuple,Fail,[Reg]}.
+            %%   {test,test_arity,Fail,[Reg,5]}.
+            %%
+            %% Note that the test_arity instruction can never be reached.
+            %% To make sure it's not rejected, set the type of Reg to
+            %% NewType instead of 'none'.
+            NewType;
+        Type ->
+            Type
+    end.
 
 get_tuple_size({integer,[]}) -> 0;
 get_tuple_size({integer,Sz}) -> Sz;
@@ -1608,6 +1690,17 @@ get_tuple_size(_) -> 0.
 validate_src(Ss, Vst) when is_list(Ss) ->
     foreach(fun(S) -> get_term_type(S, Vst) end, Ss).
 
+%% get_durable_term_type(Src, ValidatorState) -> Type
+%%  Get the type of the source Src. The returned type Type will be
+%%  a standard Erlang type (no catch/try tags or match contexts).
+%%  Fragility will be stripped.
+
+get_durable_term_type(Src, Vst) ->
+    case get_term_type(Src, Vst) of
+        {fragile,Type} -> Type;
+        Type -> Type
+    end.
+
 %% get_move_term_type(Src, ValidatorState) -> Type
 %%  Get the type of the source Src. The returned type Type will be
 %%  a standard Erlang type (no catch/try tags). Match contexts are OK.
@@ -1641,6 +1734,8 @@ get_term_type_1(nil=T, _) -> T;
 get_term_type_1({atom,A}=T, _) when is_atom(A) -> T;
 get_term_type_1({float,F}=T, _) when is_float(F) -> T;
 get_term_type_1({integer,I}=T, _) when is_integer(I) -> T;
+get_term_type_1({literal,[_|_]}, _) -> cons;
+get_term_type_1({literal,Bitstring}, _) when is_bitstring(Bitstring) -> binary;
 get_term_type_1({literal,Map}, _) when is_map(Map) -> map;
 get_term_type_1({literal,Tuple}, _) when is_tuple(Tuple) ->
     {tuple,tuple_size(Tuple)};
@@ -1809,6 +1904,10 @@ merge_types({atom,A}, bool) ->
     merge_bool(A);
 merge_types(cons, {literal,[_|_]}) ->
     cons;
+merge_types(cons, nil) ->
+    list;
+merge_types(nil, cons) ->
+    list;
 merge_types({literal,[_|_]}, cons) ->
     cons;
 merge_types({literal,[_|_]}, {literal,[_|_]}) ->
@@ -2041,6 +2140,14 @@ return_type_1(erlang, setelement, 3, Vst) ->
 	{integer,I} -> upgrade_tuple_type({tuple,[I]}, TupleType);
 	_ -> TupleType
     end;
+return_type_1(erlang, '++', 2, Vst) ->
+    case get_term_type({x,0}, Vst) =:= cons orelse
+        get_term_type({x,1}, Vst) =:= cons of
+        true -> cons;
+        false -> list
+    end;
+return_type_1(erlang, '--', 2, _Vst) ->
+    list;
 return_type_1(erlang, F, A, _) ->
     return_type_erl(F, A);
 return_type_1(math, F, A, _) ->
diff --git a/lib/compiler/src/beam_z.erl b/lib/compiler/src/beam_z.erl
index 677094b3cd..415b579240 100644
--- a/lib/compiler/src/beam_z.erl
+++ b/lib/compiler/src/beam_z.erl
@@ -71,6 +71,31 @@ undo_renames([{get_hd,Src,Dst1},{get_tl,Src,Dst2}|Is]) ->
     [{get_list,Src,Dst1,Dst2}|undo_renames(Is)];
 undo_renames([{get_tl,Src,Dst2},{get_hd,Src,Dst1}|Is]) ->
     [{get_list,Src,Dst1,Dst2}|undo_renames(Is)];
+undo_renames([{bs_put,_,{bs_put_binary,1,_},
+               [{atom,all},{literal,<<>>}]}|Is]) ->
+    undo_renames(Is);
+undo_renames([{bs_put,Fail,{bs_put_binary,1,_Flags},
+               [{atom,all},{literal,BinString}]}|Is0]) ->
+    Bits = bit_size(BinString),
+    Bytes = Bits div 8,
+    case Bits rem 8 of
+        0 ->
+            I = {bs_put_string,byte_size(BinString),
+                 {string,BinString}},
+            [undo_rename(I)|undo_renames(Is0)];
+        Rem ->
+            <<Binary:Bytes/bytes,Int:Rem>> = BinString,
+            PutInt = {bs_put_integer,Fail,{integer,Rem},1,
+                      {field_flags,[unsigned,big]},{integer,Int}},
+            Is = [PutInt|undo_renames(Is0)],
+            case Binary of
+                <<>> ->
+                    Is;
+                _ ->
+                    [{bs_put_string,byte_size(Binary),
+                      {string,Binary}}|Is]
+            end
+    end;
 undo_renames([I|Is]) ->
     [undo_rename(I)|undo_renames(Is)];
 undo_renames([]) -> [].
@@ -79,8 +104,6 @@ undo_rename({bs_put,F,{I,U,Fl},[Sz,Src]}) ->
     {I,F,Sz,U,Fl,Src};
 undo_rename({bs_put,F,{I,Fl},[Src]}) ->
     {I,F,Fl,Src};
-undo_rename({bs_put,{f,0},{bs_put_string,_,_}=I,[]}) ->
-    I;
 undo_rename({bif,bs_add=I,F,[Src1,Src2,{integer,U}],Dst}) ->
     {I,F,[Src1,Src2,U],Dst};
 undo_rename({bif,bs_utf8_size=I,F,[Src],Dst}) ->
@@ -101,7 +124,7 @@ undo_rename({test,bs_match_string=Op,F,[Ctx,Bin0]}) ->
 	      0 -> Bin0;
 	      Rem -> <<Bin0/bitstring,0:(8-Rem)>>
 	  end,
-    {test,Op,F,[Ctx,Bits,{string,binary_to_list(Bin)}]};
+    {test,Op,F,[Ctx,Bits,{string,Bin}]};
 undo_rename({put_map,Fail,assoc,S,D,R,L}) ->
     {put_map_assoc,Fail,S,D,R,L};
 undo_rename({put_map,Fail,exact,S,D,R,L}) ->
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index 14c8c5b4ab..73c66e6efc 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -855,8 +855,6 @@ asm_passes() ->
 	 {iff,dblk,{listing,"block"}},
 	 {unless,no_except,{pass,beam_except}},
 	 {iff,dexcept,{listing,"except"}},
-	 {unless,no_bs_opt,{pass,beam_bs}},
-	 {iff,dbs,{listing,"bs"}},
 	 {unless,no_jopt,{pass,beam_jump}},
 	 {iff,djmp,{listing,"jump"}},
 	 {unless,no_peep_opt,{pass,beam_peep}},
@@ -2084,7 +2082,6 @@ pre_load() ->
     L = [beam_a,
 	 beam_asm,
 	 beam_block,
-	 beam_bs,
 	 beam_clean,
 	 beam_dict,
 	 beam_except,
diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src
index 1472e3fde1..108a0ca100 100644
--- a/lib/compiler/src/compiler.app.src
+++ b/lib/compiler/src/compiler.app.src
@@ -24,7 +24,6 @@
 	     beam_a,
 	     beam_asm,
 	     beam_block,
-	     beam_bs,
 	     beam_clean,
 	     beam_dict,
 	     beam_disasm,
diff --git a/lib/compiler/src/erl_bifs.erl b/lib/compiler/src/erl_bifs.erl
index ce9762899e..d925decce6 100644
--- a/lib/compiler/src/erl_bifs.erl
+++ b/lib/compiler/src/erl_bifs.erl
@@ -195,6 +195,7 @@ is_safe(erlang, is_float, 1) -> true;
 is_safe(erlang, is_function, 1) -> true;
 is_safe(erlang, is_integer, 1) -> true;
 is_safe(erlang, is_list, 1) -> true;
+is_safe(erlang, is_map, 1) -> true;
 is_safe(erlang, is_number, 1) -> true;
 is_safe(erlang, is_pid, 1) -> true;
 is_safe(erlang, is_port, 1) -> true;
diff --git a/lib/compiler/src/sys_core_inline.erl b/lib/compiler/src/sys_core_inline.erl
index 5a6cc45e4a..3380e3f1bd 100644
--- a/lib/compiler/src/sys_core_inline.erl
+++ b/lib/compiler/src/sys_core_inline.erl
@@ -195,6 +195,9 @@ kill_id_anns(Body) ->
     cerl_trees:map(fun(#c_fun{anno=A0}=CFun) ->
 			   A = kill_id_anns_1(A0),
 			   CFun#c_fun{anno=A};
+                      (#c_var{anno=A0}=Var) ->
+			   A = kill_id_anns_1(A0),
+			   Var#c_var{anno=A};
 		      (Expr) ->
 			   %% Mark everything as compiler generated to
 			   %% suppress bogus warnings.