aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/beam/ops.tab
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/beam/ops.tab')
-rw-r--r--erts/emulator/beam/ops.tab687
1 files changed, 442 insertions, 245 deletions
diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab
index e76d896ffc..10ca74cd60 100644
--- a/erts/emulator/beam/ops.tab
+++ b/erts/emulator/beam/ops.tab
@@ -74,23 +74,19 @@ trace_jump W
return
+# To ensure that a "move Src x(0)" instruction can be combined with
+# the following call instruction, we need to make sure that there is
+# no line/1 instruction between the move and the call.
#
-# To ensure that a "move Src x(0)" instruction can be combined
-# with the following call instruction, we need to make sure that
-# there is no line/1 instruction between the move and the call.
-#
-# A tail-recursive call to an external function (non-BIF) will
-# never be saved on the stack, so there is no reason to keep
-# the line instruction. (The compiler did not remove the line
-# instruction because it cannot tell the difference between
-# BIFs and ordinary Erlang functions.)
-#
+# A tail-recursive call to an external function (BIF or non-BIF) will
+# never be saved on the stack, so there is no reason to keep the line
+# instruction.
move S X0=x==0 | line Loc | call_ext Ar Func => \
line Loc | move S X0 | call_ext Ar Func
-move S X0=x==0 | line Loc | call_ext_last Ar Func=u$is_not_bif D => \
+move S X0=x==0 | line Loc | call_ext_last Ar Func D => \
move S X0 | call_ext_last Ar Func D
-move S X0=x==0 | line Loc | call_ext_only Ar Func=u$is_not_bif => \
+move S X0=x==0 | line Loc | call_ext_only Ar Func => \
move S X0 | call_ext_only Ar Func
move S X0=x==0 | line Loc | call Ar Func => \
line Loc | move S X0 | call Ar Func
@@ -102,15 +98,18 @@ line I
allocate t t?
allocate_heap t I t?
-%cold
+# This instruction when a BIF is called tail-recursively when
+# ther is stack frame.
deallocate Q
-%hot
init y
allocate_zero t t?
allocate_heap_zero t I t?
+move Src=y Dst=x | trim N Remaining => move_trim Src Dst N
trim N Remaining => i_trim N
+
+move_trim y x t
i_trim t
test_heap I t?
@@ -118,11 +117,21 @@ test_heap I t?
allocate_heap S u==0 R => allocate S R
allocate_heap_zero S u==0 R => allocate_zero S R
-init2 y y
-init3 y y y
+init Y1 | init Y2 | init Y3 | succ(Y1,Y2) | succ(Y2,Y3) => init_seq3 Y1
+init_seq3 Y1 | init Y4 | succ3(Y1,Y4) => init_seq4 Y1
+init_seq4 Y1 | init Y5 | succ4(Y1,Y5) => init_seq5 Y1
+
+init_seq3 y
+init_seq4 y
+init_seq5 y
+
init Y1 | init Y2 | init Y3 => init3 Y1 Y2 Y3
init Y1 | init Y2 => init2 Y1 Y2
+init2 y y
+init3 y y y
+
+
# Selecting values
select_val S=aiq Fail=f Size=u Rest=* => const_select_val(S, Fail, Size, Rest)
@@ -205,14 +214,11 @@ set_tuple_element s S P
# Get tuple element
-i_get_tuple_element xy P x
-
-%cold
-i_get_tuple_element xy P y
-%hot
+i_get_tuple_element xy P xy
i_get_tuple_element2 x P x
-i_get_tuple_element2y x P y y
+i_get_tuple_element2_dst x P x x
+i_get_tuple_element2_dst x P y y
i_get_tuple_element3 x P x
@@ -258,12 +264,14 @@ system_limit j
# Move instructions.
#
-move C=cxy x==0 | jump Lbl => move_jump Lbl C
+move Src=cxy Dst=xy | jump Lbl => move_jump Lbl Src Dst
-move_jump f ncxy
+move_jump f cxy xy
+move_jump f c r
-# Movement to and from the stack is common
-# Try to pack as much as we can into one instruction
+
+# Movement to and from the stack is common.
+# Try to pack as much as we can into one instruction.
# Window move
move_window/5
@@ -274,6 +282,9 @@ move_window/6
move X1=x Y1=y | move X2=x Y2=y | move X3=x Y3=y | succ(Y1,Y2) | succ(Y2,Y3) => \
move_window X1 X2 X3 Y1 Y3
+move X1=x Y1=y | move X2=x Y2=y | succ(Y1,Y2) => \
+ move_window2 X1 X2 Y1
+
move_window X1=x X2=x X3=x Y1=y Y3=y | move X4=x Y4=y | succ(Y3,Y4) => \
move_window X1 X2 X3 X4 Y1 Y4
@@ -283,15 +294,54 @@ move_window X1=x X2=x X3=x X4=x Y1=y Y4=y | move X5=x Y5=y | succ(Y4,Y5) => \
move_window X1=x X2=x X3=x Y1=y Y3=y => move_window3 X1 X2 X3 Y1
move_window X1=x X2=x X3=x X4=x Y1=y Y4=y => move_window4 X1 X2 X3 X4 Y1
+move_window2 x x y
move_window3 x x x y
move_window4 x x x x y
move_window5 x x x x x y
+# y -> x
+
+move_src_window/4
+move_src_window/5
+
+move Y1=y X1=x | move Y2=y X2=x | succ(Y1, Y2) => \
+ move_src_window Y1 Y2 X1 X2
+
+move_src_window Y1 Y2 X1 X2 | move Y3=y X3=x | succ(Y2, Y3) => \
+ move_src_window Y1 Y3 X1 X2 X3
+move_src_window Y1 Y2 X1 X2 | move Y3=y X3=x | move Y4=y X4=x | succ(Y3, Y4) => \
+ move_src_window2 Y1 X1 X2 | move_src_window Y3 Y4 X3 X4
+move_src_window Y1 Y2 X1 X2 | move Y3=y X3=x => \
+ move3 Y1 X1 Y2 X2 Y3 X3
+
+move_src_window Y1 Y3 X1 X2 X3 | move Y4=y X4=x | succ(Y3, Y4) => \
+ move_src_window4 Y1 X1 X2 X3 X4
+
+move_src_window Y1 y X1 X2 => move_src_window2 Y1 X1 X2
+move_src_window Y1 y X1 X2 X3 => move_src_window3 Y1 X1 X2 X3
+
+move_src_window2 y x x
+move_src_window3 y x x x
+move_src_window4 y x x x x
+
# Swap registers.
-move R1=x Tmp=x | move R2=xy R1 | move Tmp R2 => swap_temp R1 R2 Tmp
+move R1=xy Tmp=x | move R2=xy R1 | move Tmp R2 => swap_temp R1 R2 Tmp
+
+# The compiler uses x(1022) when swapping registers. It will definitely
+# not be used again.
+swap_temp R1 R2 Tmp=x==1022 => swap R1 R2
+
+swap_temp R1 R2 Tmp | move Src Tmp => swap R1 R2 | move Src Tmp
swap_temp R1 R2 Tmp | line Loc | apply Live | is_killed_apply(Tmp, Live) => \
swap R1 R2 | line Loc | apply Live
+swap_temp R1 R2 Tmp | line Loc | apply_last Live D | is_killed_apply(Tmp, Live) => \
+ swap R1 R2 | line Loc | apply_last Live D
+
+swap_temp R1 R2 Tmp | line Loc | call_fun Live | is_killed_by_call_fun(Tmp, Live) => \
+ swap R1 R2 | line Loc | call_fun Live
+swap_temp R1 R2 Tmp | make_fun2 OldIndex=u | is_killed_by_make_fun(Tmp, OldIndex) => \
+ swap R1 R2 | make_fun2 OldIndex
swap_temp R1 R2 Tmp | line Loc | call Live Addr | is_killed(Tmp, Live) => \
swap R1 R2 | line Loc | call Live Addr
@@ -307,84 +357,112 @@ swap_temp R1 R2 Tmp | line Loc | call_ext_only Live Addr | \
swap_temp R1 R2 Tmp | line Loc | call_ext_last Live Addr D | \
is_killed(Tmp, Live) => swap R1 R2 | line Loc | call_ext_last Live Addr D
-swap_temp x xy x
+swap_temp R1 R2 Tmp | call_ext Live Addr | is_killed(Tmp, Live) => \
+ swap R1 R2 | call_ext Live Addr
+swap_temp R1 R2 Tmp | call_ext_only Live Addr | is_killed(Tmp, Live) => \
+ swap R1 R2 | call_ext_only Live Addr
+swap_temp R1 R2 Tmp | call_ext_last Live Addr D | is_killed(Tmp, Live) => \
+ swap R1 R2 | call_ext_last Live Addr D
+
+swap_temp R1 R2 Tmp | move Src Any | line Loc | call Live Addr | \
+ is_killed(Tmp, Live) | distinct(Tmp, Src) => \
+ swap R1 R2 | move Src Any | line Loc | call Live Addr
+swap_temp R1 R2 Tmp | move Src Any | line Loc | call_ext Live Addr | \
+ is_killed(Tmp, Live) | distinct(Tmp, Src) => \
+ swap R1 R2 | move Src Any | line Loc | call_ext Live Addr
+swap_temp R1 R2 Tmp | move Src Any | call_only Live Addr | \
+ is_killed(Tmp, Live) | distinct(Tmp, Src) => \
+ swap R1 R2 | move Src Any | call_only Live Addr
+swap_temp R1 R2 Tmp | move Src Any | line Loc | call_ext_only Live Addr | \
+ is_killed(Tmp, Live) | distinct(Tmp, Src) => \
+ swap R1 R2 | move Src Any | line Loc | call_ext_only Live Addr
+swap_temp R1 R2 Tmp | move Src Any | line Loc | call_fun Live | \
+ is_killed(Tmp, Live) | distinct(Tmp, Src) => \
+ swap R1 R2 | move Src Any | line Loc | call_fun Live
+
+swap_temp R1 R2 Tmp | line Loc | send | is_killed_by_send(Tmp) => \
+ swap R1 R2 | line Loc | send
+
+# swap_temp/3 with Y register operands are rare.
+swap_temp R1 R2=y Tmp => swap R1 R2 | move R2 Tmp
+swap_temp R1=y R2 Tmp => swap R1 R2 | move R2 Tmp
+
+swap R1=x R2=y => swap R2 R1
+
+swap_temp x x x
+
+swap xy x
+swap y y
+
+# move_shift
+
+move SD=x D=x | move Src=cxy SD=x | distinct(D, Src) => move_shift Src SD D
+move SD=y D=x | move Src=x SD=y | distinct(D, Src) => move_shift Src SD D
+move SD=y D=x | init SD | => move_shift n SD D
+move SD=x D=y | move Src=x SD=x | distinct(D, Src) => move_shift Src SD D
+move SD=x==0 D=y | move Src=y SD=x==0 | distinct(D, Src) => move_shift Src SD D
+
+move_shift cxy x x
+move_shift nx y x
+move_shift x x y
+move_shift y r y
+
+# move2_par x x x x
-swap x xy
+move X1=x X2=x | move X3=x X4=x | independent_moves(X1, X2, X3, X4) => \
+ move2_par X1 X2 X3 X4
+move2_par x x x x
-move Src=x D1=x | move Src=x D2=x => move_dup Src D1 D2
-move Src=x SD=x | move SD=x D=x => move_dup Src SD D
-move Src=x D1=x | move Src=x D2=y => move_dup Src D1 D2
-move Src=y SD=x | move SD=x D=y => move_dup Src SD D
-move Src=x SD=x | move SD=x D=y => move_dup Src SD D
-move Src=y SD=x | move SD=x D=x => move_dup Src SD D
+# move2_par x x x y
-move SD=x D=x | move Src=xy SD=x => move_shift Src SD D
-move SD=y D=x | move Src=x SD=y => move_shift Src SD D
-move SD=x D=y | move Src=x SD=x => move_shift Src SD D
+move X1=x X2=x | move X3=x Y1=y | independent_moves(X1, X2, X3, Y1) => \
+ move2_par X1 X2 X3 Y1
+move X3=x Y1=y | move X1=x X2=x | independent_moves(X3, Y1, X1, X2) => \
+ move2_par X1 X2 X3 Y1
+move2_par x x x y
-# The transformations above guarantee that the source for
-# the second move is not the same as the destination for
-# the first move. That means that we can do the moves in
-# parallel (fetch both values, then store them) which could
-# be faster.
+# move2_par y x y x
-move X1=x Y1=y | move X2=x Y2=y => move2_par X1 Y1 X2 Y2
move Y1=y X1=x | move Y2=y X2=x => move2_par Y1 X1 Y2 X2
+move2_par y x y x
-move X1=x X2=x | move X3=x X4=x => move2_par X1 X2 X3 X4
+# move2_par y x x y
-move X1=x X2=x | move X3=x Y1=y => move2_par X1 X2 X3 Y1
+move S1=y S2=x | move X1=x Y1=y | independent_moves(S1, S2, X1, Y1) => \
+ move2_par S1 S2 X1 Y1
+move X1=x Y1=y | move S1=y S2=x | independent_moves(S1, S2, X1, Y1) => \
+ move2_par S1 S2 X1 Y1
+move2_par y x x y
-move S1=x S2=x | move X1=x Y1=y => move2_par S1 S2 X1 Y1
+# move2_par y x x x
-move S1=y S2=x | move X1=x Y1=y => move2_par S1 S2 X1 Y1
+move Y1=y X1=x | move S1=x D1=x | independent_moves(Y1, X1, S1, D1) => \
+ move2_par Y1 X1 S1 D1
+move S1=x D1=x | move Y1=y X1=x | independent_moves(Y1, X1, S1, D1) => \
+ move2_par Y1 X1 S1 D1
+move2_par y x x x
-move Y1=y X1=x | move S1=x D1=x => move2_par Y1 X1 S1 D1
-move S1=x D1=x | move Y1=y X1=x => move2_par S1 D1 Y1 X1
+# move3
-move2_par X1=x Y1=y X2=x Y2=y | move X3=x Y3=y => move3 X1 Y1 X2 Y2 X3 Y3
move2_par Y1=y X1=x Y2=y X2=x | move Y3=y X3=x => move3 Y1 X1 Y2 X2 Y3 X3
move2_par X1=x X2=x X3=x X4=x | move X5=x X6=x => move3 X1 X2 X3 X4 X5 X6
+move3 y x y x y x
+move3 x x x x x x
+
+# move_x1, move_x2
+
move C=aiq X=x==1 => move_x1 C
move C=aiq X=x==2 => move_x2 C
+move n D=y => init D
+
move_x1 c
move_x2 c
-move_shift x x x
-move_shift y x x
-move_shift x y x
-move_shift x x y
-
-move_dup xy x xy
-
-move2_par x y x y
-move2_par y x y x
-move2_par x x x x
-
-move2_par x x x y
-
-move2_par y x x y
-
-move2_par x x y x
-move2_par y x x x
-
-move3 x y x y x y
-move3 y x y x y x
-move3 x x x x x x
-
-# The compiler almost never generates a "move Literal y(Y)" instruction,
-# so let's cheat if we encounter one.
-move S=n D=y => init D
-move S=c D=y => move S x | move x D
-
-move x x
-move x y
-move y x
-move c x
+move xy xy
+move c xy
move n x
-move y y
# The following move instructions using x(0) are frequently used.
@@ -478,14 +556,25 @@ is_ge f? c x
is_ge f? s s
%hot
-is_eq f? s s
+is_eq Fail=f Const=c Reg=xy => is_eq Fail Reg Const
+is_eq Fail=f C1=c C2=c => move C1 x | is_eq Fail x C2
+is_eq f? S s
-is_ne f? s s
+is_ne Fail=f Const=c Reg=xy => is_ne Fail Reg Const
+is_ne Fail=f C1=c C2=c => move C1 x | is_ne Fail x C2
+is_ne f? S s
#
-# Putting things.
+# Putting tuples.
+#
+# Code compiled with OTP 22 and later uses put_tuple2 to
+# to construct a tuple.
+#
+# Code compiled before OTP 22 uses put_tuple + one put instruction
+# per element. Translate to put_tuple2.
#
+i_put_tuple/2
put_tuple Arity Dst => i_put_tuple Dst u
i_put_tuple Dst Arity Puts=* | put S1 | put S2 | \
@@ -495,11 +584,13 @@ i_put_tuple Dst Arity Puts=* | put S1 | put S2 | \
i_put_tuple Dst Arity Puts=* | put S => \
tuple_append_put(Arity, Dst, Puts, S)
-i_put_tuple/2
+i_put_tuple Dst Arity Puts=* => put_tuple2 Dst Arity Puts
-i_put_tuple xy I
+put_tuple2 xy I
#
+# Putting lists.
+#
# The instruction "put_list Const [] Dst" were generated in rare
# circumstances up to and including OTP 18. Starting with OTP 19,
# AFAIK, it should never be generated.
@@ -510,32 +601,26 @@ put_list Src Dst=x Dst => update_list Src Dst
update_list xyc x
-put_list x n x
-put_list y n x
-put_list x x x
-put_list y x x
+# put_list SrcReg1 SrcReg2 => Dst
+
+put_list xy xy x
-put_list y y x
-put_list x y x
+# put_list SrcReg [] => Dst
-# put_list SrcReg Constant Dst
+put_list xy n xy
-put_list x c x
-put_list x c y
+# put_list SrcReg Constant => x
-put_list y c x
+put_list xy c x
-# put_list Constant SrcReg Dst
+# put_list Constant SrcReg => Dst
-put_list c x x
-put_list c y x
+put_list c xy x
# The following put_list instructions using x(0) are frequently used.
-put_list r n r
-put_list r n x
-put_list r x x
-put_list r x r
+put_list r n rx
+put_list r x rx
put_list x x r
%cold
@@ -602,9 +687,18 @@ is_tuple f? rxy
test_arity Fail Literal=q Arity => move Literal x | test_arity Fail x Arity
test_arity Fail=f c Arity => jump Fail
+test_arity Fail Tuple=x Arity | get_tuple_element Tuple Pos Dst=x => \
+ test_arity_get_tuple_element Fail Tuple Arity Pos Dst
test_arity f? xy A
+test_arity_get_tuple_element f? x A P x
+
+is_tuple NotTupleFail Tuple=x | is_tagged_tuple WrongRecordFail Tuple Arity Atom => \
+ is_tagged_tuple_ff NotTupleFail WrongRecordFail Tuple Arity Atom
+
+is_tagged_tuple_ff f? f? rx A a
+
get_tuple_element Reg=x P1 D1=x | get_tuple_element Reg=x P2 D2=x | \
get_tuple_element Reg=x P3 D3=x | \
succ(P1, P2) | succ(P2, P3) | \
@@ -613,8 +707,11 @@ get_tuple_element Reg=x P1 D1=x | get_tuple_element Reg=x P2 D2=x | \
get_tuple_element Reg=x P1 D1=x | get_tuple_element Reg=x P2 D2=x | \
succ(P1, P2) | succ(D1, D2) => i_get_tuple_element2 Reg P1 D1
+get_tuple_element Reg=x P1 D1=x | get_tuple_element Reg=x P2 D2=x | \
+ succ(P1, P2) | distinct(D1, Reg) => i_get_tuple_element2_dst Reg P1 D1 D2
+
get_tuple_element Reg=x P1 D1=y | get_tuple_element Reg=x P2 D2=y | \
- succ(P1, P2) => i_get_tuple_element2y Reg P1 D1 D2
+ succ(P1, P2) => i_get_tuple_element2_dst Reg P1 D1 D2
get_tuple_element Reg P Dst => i_get_tuple_element Reg P Dst
@@ -638,14 +735,21 @@ is_list f? y
is_nonempty_list Fail=f S=x | allocate Need Rs => is_nonempty_list_allocate Fail S Need Rs
-is_nonempty_list F=f x==0 | test_heap I1 I2 => is_nonempty_list_test_heap F I1 I2
-
is_nonempty_list Fail=f S=x | get_list S D1=x D2=x => \
is_nonempty_list_get_list Fail S D1 D2
+is_nonempty_list Fail=f S=x | get_hd S Dst=x => \
+ is_nonempty_list_get_hd Fail S Dst
+
+is_nonempty_list Fail=f S=x | get_tl S Dst=x => \
+ is_nonempty_list_get_tl Fail S Dst
+
is_nonempty_list_allocate f? rx t t
-is_nonempty_list_test_heap f? I t
+
is_nonempty_list_get_list f? rx x x
+is_nonempty_list_get_hd f? x x
+is_nonempty_list_get_tl f? x x
+
is_nonempty_list f? xy
is_atom f? x
@@ -710,11 +814,12 @@ is_boolean Fail=f ac => jump Fail
is_boolean f? xy
%hot
-is_function2 Fail=f Literal=q Arity | literal_is_export(Literal) =>
-is_function2 Fail=f c Arity => jump Fail
-is_function2 Fail=f Fun a => jump Fail
+is_function2 Fail=f Fun Arity => gen_is_function2(Fail, Fun, Arity)
-is_function2 f? S s
+%cold
+cold_is_function2 f? x x
+%hot
+hot_is_function2 f? S t
# Allocating & initializing.
allocate Need Regs | init Y => allocate_init Need Regs Y
@@ -946,10 +1051,9 @@ call_ext_only u==0 u$func:os:perf_counter/0 => \
call_ext u Bif=u$is_bif => call_bif Bif
-call_ext_last u Bif=u$is_bif D => call_bif Bif | deallocate_return D
+call_ext_last u Bif=u$is_bif D => deallocate D | call_bif_only Bif
-call_ext_only Ar=u Bif=u$is_bif => \
- allocate u Ar | call_bif Bif | deallocate_return u
+call_ext_only Ar=u Bif=u$is_bif => call_bif_only Bif
#
# Any remaining calls are calls to Erlang functions, not BIFs.
@@ -981,6 +1085,7 @@ i_perf_counter
%hot
call_bif e
+call_bif_only e
#
# Calls to non-building and guard BIFs.
@@ -989,14 +1094,18 @@ call_bif e
bif0 u$bif:erlang:self/0 Dst=d => self Dst
bif0 u$bif:erlang:node/0 Dst=d => node Dst
+bif1 Fail=f Bif=u$bif:erlang:hd/1 Src=x Dst=x => is_nonempty_list_get_hd Fail Src Dst
+bif1 Fail=f Bif=u$bif:erlang:tl/1 Src=x Dst=x => is_nonempty_list_get_tl Fail Src Dst
+
bif1 Fail Bif=u$bif:erlang:get/1 Src=s Dst=d => gen_get(Src, Dst)
bif2 Jump=j u$bif:erlang:element/2 S1=s S2=xy Dst=d => gen_element(Jump, S1, S2, Dst)
-bif1 p Bif S1 Dst => bif1_body Bif S1 Dst
+bif1 p Bif S1 Dst => i_bif1_body S1 Bif Dst
+bif1 Fail=f Bif S1 Dst => i_bif1 S1 Fail Bif Dst
-bif2 p Bif S1 S2 Dst => i_bif2_body Bif S1 S2 Dst
-bif2 Fail Bif S1 S2 Dst => i_bif2 Fail Bif S1 S2 Dst
+bif2 p Bif S1 S2 Dst => i_bif2_body S2 S1 Bif Dst
+bif2 Fail=f Bif S1 S2 Dst => i_bif2 S2 S1 Fail Bif Dst
i_get_hash c I d
i_get s d
@@ -1014,10 +1123,12 @@ i_fast_element xy j? I d
i_element xy j? s d
-bif1 f? b s d
-bif1_body b s d
-i_bif2 f? b s s d
-i_bif2_body b s s d
+i_bif1 s f? b d
+i_bif1_body s b d
+i_bif2 s s f? b d
+i_bif2_body s s b d
+i_bif3 s s s f? b d
+i_bif3_body s s s b d
#
# Internal calls.
@@ -1062,8 +1173,25 @@ call_fun Arity => i_call_fun Arity
i_call_fun t
i_call_fun_last t Q
+
+#
+# A fun with an empty environment can be converted to a literal.
+# As a further optimization, the we try to move the fun to its
+# final destination directly.
+
make_fun2 OldIndex=u => gen_make_fun2(OldIndex)
+move_fun/2
+move_fun Fun X0 | move X0 Dst | move Src X0 => move Fun Dst | move Src X0
+move_fun Fun X0 | move A B | move X0 Dst | move Src X0 | \
+ independent_moves(Fun, X0, A, B) | distinct(Dst, A) => \
+ move Fun Dst | move A B | move Src X0
+move_fun Fun X0 | move X0 Dst | make_fun2 OldIndex | \
+ is_killed_by_make_fun(X0, OldIndex)=> \
+ move Fun Dst | make_fun2 OldIndex
+
+move_fun Fun Dst => move Fun Dst
+
%cold
i_make_fun W t
%hot
@@ -1074,101 +1202,141 @@ is_function Fail=f c => jump Fail
func_info M F A => i_func_info u M F A
# ================================================================
-# New bit syntax matching (R11B).
+# Bit syntax matching obsoleted in OTP 22.
# ================================================================
-%warm
+%cold
bs_start_match2 Fail=f ica X Y D => jump Fail
bs_start_match2 Fail Bin X Y D => i_bs_start_match2 Bin Fail X Y D
-i_bs_start_match2 xy f t t x
+i_bs_start_match2 xy f t t d
+bs_save2 Y=y Index => move Y x | bs_save2 x Index
bs_save2 Reg Index => gen_bs_save(Reg, Index)
i_bs_save2 x t
+bs_restore2 Y=y Index => move Y x | bs_restore2 x Index
bs_restore2 Reg Index => gen_bs_restore(Reg, Index)
i_bs_restore2 x t
+bs_context_to_binary Y=y | line L | badmatch Y => \
+ move Y x | bs_context_to_binary x | line L | badmatch x
+bs_context_to_binary Y=y => move Y x | bs_context_to_binary x
+bs_context_to_binary x
+%warm
+
+# ================================================================
+# New bit syntax matching (R11B).
+# ================================================================
+
+%warm
+
# Matching integers
bs_match_string Fail Ms Bits Val => i_bs_match_string Ms Fail Bits Val
-i_bs_match_string x f W W
+i_bs_match_string xy f W W
# Fetching integers from binaries.
-bs_get_integer2 Fail=f Ms=x Live=u Sz=sq Unit=u Flags=u Dst=d => \
+bs_get_integer2 Fail=f Ms=xy Live=u Sz=sq Unit=u Flags=u Dst=d => \
gen_get_integer2(Fail, Ms, Live, Sz, Unit, Flags, Dst)
-i_bs_get_integer_small_imm x W f? t x
-i_bs_get_integer_imm x W t f? t x
-i_bs_get_integer f? t t x s x
-i_bs_get_integer_8 x f? x
-i_bs_get_integer_16 x f? x
+i_bs_get_integer_small_imm Ms Bits Fail Flags Y=y => \
+ i_bs_get_integer_small_imm Ms Bits Fail Flags x | move x Y
+
+i_bs_get_integer_imm Ms Bits Live Fail Flags Y=y => \
+ i_bs_get_integer_imm Ms Bits Live Fail Flags x | move x Y
+
+i_bs_get_integer_small_imm xy W f? t x
+i_bs_get_integer_imm xy W t f? t x
+i_bs_get_integer xy f? t t s d
+i_bs_get_integer_8 xy f? d
+i_bs_get_integer_16 xy f? d
%if ARCH_64
-i_bs_get_integer_32 x f? x
+i_bs_get_integer_32 xy f? d
%endif
# Fetching binaries from binaries.
-bs_get_binary2 Fail=f Ms=x Live=u Sz=sq Unit=u Flags=u Dst=d => \
+bs_get_binary2 Fail=f Ms=xy Live=u Sz=sq Unit=u Flags=u Dst=d => \
gen_get_binary2(Fail, Ms, Live, Sz, Unit, Flags, Dst)
-i_bs_get_binary_imm2 f? x t W t x
-i_bs_get_binary2 f x t? s t x
-i_bs_get_binary_all2 f? x t t x
-i_bs_get_binary_all_reuse x f? t
+i_bs_get_binary_imm2 xy f? t W t d
+i_bs_get_binary2 xy f t? s t d
+i_bs_get_binary_all2 xy f? t t d
# Fetching float from binaries.
-bs_get_float2 Fail=f Ms=x Live=u Sz=s Unit=u Flags=u Dst=d => \
+bs_get_float2 Fail=f Ms=xy Live=u Sz=s Unit=u Flags=u Dst=d => \
gen_get_float2(Fail, Ms, Live, Sz, Unit, Flags, Dst)
bs_get_float2 Fail=f Ms=x Live=u Sz=q Unit=u Flags=u Dst=d => jump Fail
-i_bs_get_float2 f? x t s t x
+i_bs_get_float2 xy f? t s t d
# Miscellanous
-bs_skip_bits2 Fail=f Ms=x Sz=sq Unit=u Flags=u => \
+bs_skip_bits2 Fail=f Ms=xy Sz=sq Unit=u Flags=u => \
gen_skip_bits2(Fail, Ms, Sz, Unit, Flags)
-i_bs_skip_bits_imm2 f? x W
-i_bs_skip_bits2 f? x xy t
-i_bs_skip_bits_all2 f? x t
+i_bs_skip_bits_imm2 f? xy W
+i_bs_skip_bits2 xy xy f? t
-bs_test_tail2 Fail=f Ms=x Bits=u==0 => bs_test_zero_tail2 Fail Ms
-bs_test_tail2 Fail=f Ms=x Bits=u => bs_test_tail_imm2 Fail Ms Bits
-bs_test_zero_tail2 f? x
-bs_test_tail_imm2 f? x W
+bs_test_tail2 Fail=f Ms=xy Bits=u==0 => bs_test_zero_tail2 Fail Ms
+bs_test_tail2 Fail=f Ms=xy Bits=u => bs_test_tail_imm2 Fail Ms Bits
+bs_test_zero_tail2 f? xy
+bs_test_tail_imm2 f? xy W
bs_test_unit F Ms Unit=u==8 => bs_test_unit8 F Ms
-bs_test_unit f? x t
-bs_test_unit8 f? x
+bs_test_unit f? xy t
+bs_test_unit8 f? xy
-# An y register operand for bs_context_to_binary is rare,
-# but can happen because of inlining.
+# Gets a bitstring from the tail of a context.
+bs_get_tail xy d t
-bs_context_to_binary Y=y | line L | badmatch Y => \
- move Y x | bs_context_to_binary x | line L | badmatch x
+# New bs_start_match variant for contexts with external position storage.
+#
+# bs_get/set_position is used to save positions into registers instead of
+# "slots" in the context itself, which lets us continue matching even after
+# we've passed it off to another function.
-bs_context_to_binary Y=y => move Y x | bs_context_to_binary x
+%if ARCH_64
+bs_start_match3 Fail Bin Live Ctx | bs_get_position Ctx Pos=x Ignored => \
+ i_bs_start_match3_gp Bin Live Fail Ctx Pos
+i_bs_start_match3_gp xy t f d x
+%endif
-bs_context_to_binary x
+bs_start_match3 Fail=f ica Live Dst => jump Fail
+bs_start_match3 Fail Bin Live Dst => i_bs_start_match3 Bin Live Fail Dst
+
+i_bs_start_match3 xy t f d
+
+# Match context position instructions. 64-bit assumes that all positions can
+# fit into an unsigned small.
+
+%if ARCH_64
+ bs_get_position Src Dst Live => i_bs_get_position Src Dst
+ i_bs_get_position xy xy
+ bs_set_position xy xy
+%else
+ bs_get_position xy d t?
+ bs_set_position xy xy
+%endif
#
# Utf8/utf16/utf32 support. (R12B-5)
#
-bs_get_utf8 Fail=f Ms=x u u Dst=d => i_bs_get_utf8 Ms Fail Dst
-i_bs_get_utf8 x f? x
+bs_get_utf8 Fail=f Ms=xy u u Dst=d => i_bs_get_utf8 Ms Fail Dst
+i_bs_get_utf8 xy f? d
-bs_skip_utf8 Fail=f Ms=x u u => i_bs_get_utf8 Ms Fail x
+bs_skip_utf8 Fail=f Ms=xy u u => i_bs_get_utf8 Ms Fail x
-bs_get_utf16 Fail=f Ms=x u Flags=u Dst=d => i_bs_get_utf16 Ms Fail Flags Dst
-bs_skip_utf16 Fail=f Ms=x u Flags=u => i_bs_get_utf16 Ms Fail Flags x
+bs_get_utf16 Fail=f Ms=xy u Flags=u Dst=d => i_bs_get_utf16 Ms Fail Flags Dst
+bs_skip_utf16 Fail=f Ms=xy u Flags=u => i_bs_get_utf16 Ms Fail Flags x
-i_bs_get_utf16 x f? t x
+i_bs_get_utf16 xy f? t d
-bs_get_utf32 Fail=f Ms=x Live=u Flags=u Dst=d => \
+bs_get_utf32 Fail=f Ms=xy Live=u Flags=u Dst=d => \
bs_get_integer2 Fail Ms Live i=32 u=1 Flags Dst | \
i_bs_validate_unicode_retract Fail Dst Ms
-bs_skip_utf32 Fail=f Ms=x Live=u Flags=u => \
+bs_skip_utf32 Fail=f Ms=xy Live=u Flags=u => \
bs_get_integer2 Fail Ms Live i=32 u=1 Flags x | \
i_bs_validate_unicode_retract Fail x Ms
@@ -1182,6 +1350,9 @@ i_bs_validate_unicode_retract j s S
bs_init2 Fail Sz Words Regs Flags Dst | binary_too_big(Sz) => system_limit Fail
+bs_init2 Fail Sz Words Regs Flags Dst=y => \
+ bs_init2 Fail Sz Words Regs Flags x | move x Dst
+
bs_init2 Fail Sz=u Words=u==0 Regs Flags Dst => i_bs_init Sz Regs Dst
bs_init2 Fail Sz=u Words Regs Flags Dst => \
@@ -1202,6 +1373,8 @@ i_bs_init_heap W I t? x
bs_init_bits Fail Sz=o Words Regs Flags Dst => system_limit Fail
+bs_init_bits Fail Sz Words Regs Flags Dst=y => \
+ bs_init_bits Fail Sz Words Regs Flags x | move x Dst
bs_init_bits Fail Sz=u Words=u==0 Regs Flags Dst => i_bs_init_bits Sz Regs Dst
bs_init_bits Fail Sz=u Words Regs Flags Dst => i_bs_init_bits_heap Sz Words Regs Dst
@@ -1230,7 +1403,7 @@ bs_private_append Fail Size Unit Bin Flags Dst => \
bs_init_writable
-i_bs_append j? I t? t s x
+i_bs_append j? I t? t s xy
i_bs_private_append j? t s S x
#
@@ -1240,31 +1413,35 @@ i_bs_private_append j? t s S x
bs_put_integer Fail=j Sz=sq Unit=u Flags=u Src=s => \
gen_put_integer(Fail, Sz, Unit, Flags, Src)
-i_new_bs_put_integer j? s t s
-i_new_bs_put_integer_imm j? W t s
+i_new_bs_put_integer j? S t s
+i_new_bs_put_integer_imm xyc j? W t
#
# Utf8/utf16/utf32 support. (R12B-5)
#
-bs_utf8_size j Src=s Dst=d => i_bs_utf8_size Src Dst
+bs_utf8_size j Src Dst=d => i_bs_utf8_size Src Dst
+bs_utf16_size j Src Dst=d => i_bs_utf16_size Src Dst
-i_bs_utf8_size s x
+bs_put_utf8 Fail u Src => i_bs_put_utf8 Fail Src
-bs_utf16_size j Src=s Dst=d => i_bs_utf16_size Src Dst
-
-i_bs_utf16_size s x
-
-bs_put_utf8 Fail u Src=s => i_bs_put_utf8 Fail Src
+bs_put_utf32 Fail=j Flags=u Src=s => \
+ i_bs_validate_unicode Fail Src | bs_put_integer Fail i=32 u=1 Flags Src
-i_bs_put_utf8 j? s
+i_bs_utf8_size S x
+i_bs_utf16_size S x
-bs_put_utf16 j? t s
+i_bs_put_utf8 j? S
+bs_put_utf16 j? t S
-bs_put_utf32 Fail=j Flags=u Src=s => \
- i_bs_validate_unicode Fail Src | bs_put_integer Fail i=32 u=1 Flags Src
+i_bs_validate_unicode j? S
-i_bs_validate_unicode j? s
+# Handle unoptimized code.
+i_bs_utf8_size Src=c Dst => move Src x | i_bs_utf8_size x Dst
+i_bs_utf16_size Src=c Dst => move Src x | i_bs_utf16_size x Dst
+i_bs_put_utf8 Fail Src=c => move Src x | i_bs_put_utf8 Fail x
+bs_put_utf16 Fail Flags Src=c => move Src x | bs_put_utf16 Fail Flags x
+i_bs_validate_unicode Fail Src=c => move Src x | i_bs_validate_unicode Fail x
#
# Storing floats into binaries.
@@ -1274,7 +1451,7 @@ bs_put_float Fail Sz=q Unit Flags Val => badarg Fail
bs_put_float Fail=j Sz=s Unit=u Flags=u Src=s => \
gen_put_float(Fail, Sz, Unit, Flags, Src)
-i_new_bs_put_float j? s t s
+i_new_bs_put_float j? S t s
i_new_bs_put_float_imm j? W t s
#
@@ -1284,9 +1461,18 @@ i_new_bs_put_float_imm j? W t s
bs_put_binary Fail=j Sz=s Unit=u Flags=u Src=s => \
gen_put_binary(Fail, Sz, Unit, Flags, Src)
-i_new_bs_put_binary j? s t s
-i_new_bs_put_binary_imm j? W s
-i_new_bs_put_binary_all j? s t
+# In unoptimized code, the binary argument could be a literal. (In optimized code,
+# there would be a bs_put_string instruction.)
+i_new_bs_put_binary Fail Size Unit Lit=c => \
+ move Lit x | i_new_bs_put_binary Fail Size Unit x
+i_new_bs_put_binary_imm Fail Size Lit=c => \
+ move Lit x | i_new_bs_put_binary_imm Fail Size x
+i_new_bs_put_binary_all Lit=c Fail Unit => \
+ move Lit x | i_new_bs_put_binary_all x Fail Unit
+
+i_new_bs_put_binary j? S t S
+i_new_bs_put_binary_imm j? W S
+i_new_bs_put_binary_all xy j? t
#
# Warning: The i_bs_put_string and i_new_bs_put_string instructions
@@ -1384,23 +1570,22 @@ put_map_exact F Map Dst Live Size Rest=* | map_key_sort(Size, Rest) => \
sorted_put_map_assoc Map Dst Live Size Rest=* | is_empty_map(Map) => \
new_map Dst Live Size Rest
-sorted_put_map_assoc Src=s Dst Live Size Rest=* => \
- update_map_assoc Src Dst Live Size Rest
-sorted_put_map_assoc Src Dst Live Size Rest=* => \
- move Src x | update_map_assoc x Dst Live Size Rest
+sorted_put_map_assoc Src=xyc Dst Live Size Rest=* => \
+ update_map_assoc Src Dst Live Size Rest
-sorted_put_map_exact F Src=s Dst Live Size Rest=* => \
- update_map_exact F Src Dst Live Size Rest
-sorted_put_map_exact F Src Dst Live Size Rest=* => \
- move Src x | update_map_exact F x Dst Live Size Rest
+sorted_put_map_exact Fail Src=xy Dst Live Size Rest=* => \
+ update_map_exact Src Fail Dst Live Size Rest
+# Literal map arguments for an exact update operation are extremely rare.
+sorted_put_map_exact Fail Src Dst Live Size Rest=* => \
+ move Src x | update_map_exact x Fail Dst Live Size Rest
new_map Dst Live Size Rest=* | is_small_map_literal_keys(Size, Rest) => \
gen_new_small_map_lit(Dst, Live, Size, Rest)
new_map d t I
i_new_small_map_lit d t q
-update_map_assoc s d t I
-update_map_exact j? s d t I
+update_map_assoc xyc d t I
+update_map_exact xy j? d t I
is_map Fail Lit=q | literal_is_map(Lit) =>
is_map Fail cq => jump Fail
@@ -1447,80 +1632,93 @@ gc_bif2 Fail Live u$bif:erlang:sminus/2 S1 S2 Dst => \
#
# Optimize addition and subtraction of small literals using
-# the i_increment/4 instruction (in bodies, not in guards).
+# the i_increment/3 instruction (in bodies, not in guards).
#
gen_plus p Live Int=i Reg=d Dst => \
- gen_increment(Reg, Int, Live, Dst)
+ gen_increment(Reg, Int, Dst)
gen_plus p Live Reg=d Int=i Dst => \
- gen_increment(Reg, Int, Live, Dst)
+ gen_increment(Reg, Int, Dst)
gen_minus p Live Reg=d Int=i Dst | negation_is_small(Int) => \
- gen_increment_from_minus(Reg, Int, Live, Dst)
+ gen_increment_from_minus(Reg, Int, Dst)
#
-# GCing arithmetic instructions.
+# Arithmetic instructions.
#
-gen_plus Fail Live S1 S2 Dst => i_plus S1 S2 Fail Live Dst
+# It is OK to swap arguments for '+' in a guard. It is also
+# OK to turn minus into plus in a guard.
+gen_plus Fail=f Live S1=c S2 Dst => i_plus S2 S1 Fail Dst
+gen_minus Fail=f Live S1 S2=i Dst => gen_plus_from_minus(Fail, Live, S1, S2, Dst)
+
+gen_plus Fail Live S1 S2 Dst => i_plus S1 S2 Fail Dst
-gen_minus Fail Live S1 S2 Dst => i_minus S1 S2 Fail Live Dst
+gen_minus Fail Live S1 S2 Dst => i_minus S1 S2 Fail Dst
gc_bif2 Fail Live u$bif:erlang:stimes/2 S1 S2 Dst => \
- i_times Fail Live S1 S2 Dst
+ i_times Fail S1 S2 Dst
gc_bif2 Fail Live u$bif:erlang:div/2 S1 S2 Dst => \
- i_m_div Fail Live S1 S2 Dst
+ i_m_div Fail S1 S2 Dst
gc_bif2 Fail Live u$bif:erlang:intdiv/2 S1 S2 Dst => \
- i_int_div Fail Live S1 S2 Dst
+ i_int_div Fail S1 S2 Dst
gc_bif2 Fail Live u$bif:erlang:rem/2 S1 S2 Dst => \
- i_rem S1 S2 Fail Live Dst
+ i_rem S1 S2 Fail Dst
gc_bif2 Fail Live u$bif:erlang:bsl/2 S1 S2 Dst => \
- i_bsl S1 S2 Fail Live Dst
+ i_bsl S1 S2 Fail Dst
gc_bif2 Fail Live u$bif:erlang:bsr/2 S1 S2 Dst => \
- i_bsr S1 S2 Fail Live Dst
+ i_bsr S1 S2 Fail Dst
gc_bif2 Fail Live u$bif:erlang:band/2 S1 S2 Dst => \
- i_band S1 S2 Fail Live Dst
+ i_band S1 S2 Fail Dst
gc_bif2 Fail Live u$bif:erlang:bor/2 S1 S2 Dst => \
- i_bor Fail Live S1 S2 Dst
+ i_bor Fail S1 S2 Dst
gc_bif2 Fail Live u$bif:erlang:bxor/2 S1 S2 Dst => \
- i_bxor Fail Live S1 S2 Dst
+ i_bxor Fail S1 S2 Dst
-gc_bif1 Fail I u$bif:erlang:bnot/1 Src Dst=d => i_int_bnot Fail Src I Dst
+gc_bif1 Fail Live u$bif:erlang:bnot/1 Src Dst=d => i_int_bnot Fail Src Dst
-i_increment rxy W t d
+i_increment rxy W d
-i_plus x xy j? t d
-i_plus s s j? t d
+# Handle unoptimized code.
+i_plus S1=c S2=c Fail Dst => move S1 x | i_plus x S2 Fail Dst
-i_minus x x j? t d
-i_minus s s j? t d
+i_plus xy xyc j? d
-i_times j? t s s d
+# A minus instruction with a constant right operand will be
+# converted to an i_increment instruction, except in guards or
+# when the negated value of the constant won't fit in a guard.
+# Therefore, it very rare.
+i_minus S1 S2=c Fail Dst => move S2 x | i_minus S1 x Fail Dst
-i_m_div j? t s s d
-i_int_div j? t s s d
+i_minus xy xy j? d
+i_minus c xy j? d
-i_rem x x j? t d
-i_rem s s j? t d
+i_times j? s s d
-i_bsl s s j? t d
-i_bsr s s j? t d
+i_m_div j? s s d
+i_int_div j? s s d
-i_band x c j? t d
-i_band s s j? t d
+i_rem x x j? d
+i_rem s s j? d
-i_bor j? I s s d
-i_bxor j? I s s d
+i_bsl s s j? d
+i_bsr s s j? d
-i_int_bnot Fail Src=c Live Dst => move Src x | i_int_bnot Fail x Live Dst
+i_band x c j? d
+i_band s s j? d
-i_int_bnot j? S t d
+i_bor j? s s d
+i_bxor j? s s d
+
+i_int_bnot Fail Src=c Dst => move Src x | i_int_bnot Fail x Dst
+
+i_int_bnot j? S d
#
# Old guard BIFs that creates heap fragments are no longer allowed.
@@ -1533,29 +1731,28 @@ bif1 Fail u$bif:erlang:round/1 s d => too_old_compiler
bif1 Fail u$bif:erlang:trunc/1 s d => too_old_compiler
#
-# Guard BIFs.
+# Handle the length/1 guard BIF specially to make it trappable.
#
-gc_bif1 Fail I Bif Src Dst => \
- gen_guard_bif1(Fail, I, Bif, Src, Dst)
-
-gc_bif2 Fail I Bif S1 S2 Dst => \
- gen_guard_bif2(Fail, I, Bif, S1, S2, Dst)
-gc_bif3 Fail I Bif S1 S2 S3 Dst => \
- gen_guard_bif3(Fail, I, Bif, S1, S2, S3, Dst)
+gc_bif1 Fail=j Live u$bif:erlang:length/1 Src Dst => \
+ i_length_setup Live Src | i_length Fail Live Dst
-i_gc_bif1 j? W s t? d
+i_length_setup Live Src=c => move Src x | i_length_setup Live x
-i_gc_bif2 j? W t? s s d
+i_length_setup t xy
+i_length j? t d
-ii_gc_bif3/7
+#
+# Guard BIFs.
+#
+gc_bif1 p Live Bif Src Dst => i_bif1_body Src Bif Dst
+gc_bif1 Fail=f Live Bif Src Dst => i_bif1 Src Fail Bif Dst
-# A specific instruction can only have 6 operands, so we must
-# pass one of the arguments in an x register.
-ii_gc_bif3 Fail Bif Live S1 S2 S3 Dst => \
- move S1 x | i_gc_bif3 Fail Bif Live S2 S3 Dst
+gc_bif2 p Live Bif S1 S2 Dst => i_bif2_body S2 S1 Bif Dst
+gc_bif2 Fail=f Live Bif S1 S2 Dst => i_bif2 S2 S1 Fail Bif Dst
-i_gc_bif3 j? W t? s s d
+gc_bif3 p Live Bif S1 S2 S3 Dst => i_bif3_body S3 S2 S1 Bif Dst
+gc_bif3 Fail=f Live Bif S1 S2 S3 Dst => i_bif3 S3 S2 S1 Fail Bif Dst
#
# The following instruction is specially handled in beam_load.c