diff options
Diffstat (limited to 'erts/emulator/beam/ops.tab')
-rw-r--r-- | erts/emulator/beam/ops.tab | 687 |
1 files changed, 442 insertions, 245 deletions
diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index e76d896ffc..10ca74cd60 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -74,23 +74,19 @@ trace_jump W return +# To ensure that a "move Src x(0)" instruction can be combined with +# the following call instruction, we need to make sure that there is +# no line/1 instruction between the move and the call. # -# To ensure that a "move Src x(0)" instruction can be combined -# with the following call instruction, we need to make sure that -# there is no line/1 instruction between the move and the call. -# -# A tail-recursive call to an external function (non-BIF) will -# never be saved on the stack, so there is no reason to keep -# the line instruction. (The compiler did not remove the line -# instruction because it cannot tell the difference between -# BIFs and ordinary Erlang functions.) -# +# A tail-recursive call to an external function (BIF or non-BIF) will +# never be saved on the stack, so there is no reason to keep the line +# instruction. move S X0=x==0 | line Loc | call_ext Ar Func => \ line Loc | move S X0 | call_ext Ar Func -move S X0=x==0 | line Loc | call_ext_last Ar Func=u$is_not_bif D => \ +move S X0=x==0 | line Loc | call_ext_last Ar Func D => \ move S X0 | call_ext_last Ar Func D -move S X0=x==0 | line Loc | call_ext_only Ar Func=u$is_not_bif => \ +move S X0=x==0 | line Loc | call_ext_only Ar Func => \ move S X0 | call_ext_only Ar Func move S X0=x==0 | line Loc | call Ar Func => \ line Loc | move S X0 | call Ar Func @@ -102,15 +98,18 @@ line I allocate t t? allocate_heap t I t? -%cold +# This instruction when a BIF is called tail-recursively when +# ther is stack frame. deallocate Q -%hot init y allocate_zero t t? allocate_heap_zero t I t? +move Src=y Dst=x | trim N Remaining => move_trim Src Dst N trim N Remaining => i_trim N + +move_trim y x t i_trim t test_heap I t? @@ -118,11 +117,21 @@ test_heap I t? allocate_heap S u==0 R => allocate S R allocate_heap_zero S u==0 R => allocate_zero S R -init2 y y -init3 y y y +init Y1 | init Y2 | init Y3 | succ(Y1,Y2) | succ(Y2,Y3) => init_seq3 Y1 +init_seq3 Y1 | init Y4 | succ3(Y1,Y4) => init_seq4 Y1 +init_seq4 Y1 | init Y5 | succ4(Y1,Y5) => init_seq5 Y1 + +init_seq3 y +init_seq4 y +init_seq5 y + init Y1 | init Y2 | init Y3 => init3 Y1 Y2 Y3 init Y1 | init Y2 => init2 Y1 Y2 +init2 y y +init3 y y y + + # Selecting values select_val S=aiq Fail=f Size=u Rest=* => const_select_val(S, Fail, Size, Rest) @@ -205,14 +214,11 @@ set_tuple_element s S P # Get tuple element -i_get_tuple_element xy P x - -%cold -i_get_tuple_element xy P y -%hot +i_get_tuple_element xy P xy i_get_tuple_element2 x P x -i_get_tuple_element2y x P y y +i_get_tuple_element2_dst x P x x +i_get_tuple_element2_dst x P y y i_get_tuple_element3 x P x @@ -258,12 +264,14 @@ system_limit j # Move instructions. # -move C=cxy x==0 | jump Lbl => move_jump Lbl C +move Src=cxy Dst=xy | jump Lbl => move_jump Lbl Src Dst -move_jump f ncxy +move_jump f cxy xy +move_jump f c r -# Movement to and from the stack is common -# Try to pack as much as we can into one instruction + +# Movement to and from the stack is common. +# Try to pack as much as we can into one instruction. # Window move move_window/5 @@ -274,6 +282,9 @@ move_window/6 move X1=x Y1=y | move X2=x Y2=y | move X3=x Y3=y | succ(Y1,Y2) | succ(Y2,Y3) => \ move_window X1 X2 X3 Y1 Y3 +move X1=x Y1=y | move X2=x Y2=y | succ(Y1,Y2) => \ + move_window2 X1 X2 Y1 + move_window X1=x X2=x X3=x Y1=y Y3=y | move X4=x Y4=y | succ(Y3,Y4) => \ move_window X1 X2 X3 X4 Y1 Y4 @@ -283,15 +294,54 @@ move_window X1=x X2=x X3=x X4=x Y1=y Y4=y | move X5=x Y5=y | succ(Y4,Y5) => \ move_window X1=x X2=x X3=x Y1=y Y3=y => move_window3 X1 X2 X3 Y1 move_window X1=x X2=x X3=x X4=x Y1=y Y4=y => move_window4 X1 X2 X3 X4 Y1 +move_window2 x x y move_window3 x x x y move_window4 x x x x y move_window5 x x x x x y +# y -> x + +move_src_window/4 +move_src_window/5 + +move Y1=y X1=x | move Y2=y X2=x | succ(Y1, Y2) => \ + move_src_window Y1 Y2 X1 X2 + +move_src_window Y1 Y2 X1 X2 | move Y3=y X3=x | succ(Y2, Y3) => \ + move_src_window Y1 Y3 X1 X2 X3 +move_src_window Y1 Y2 X1 X2 | move Y3=y X3=x | move Y4=y X4=x | succ(Y3, Y4) => \ + move_src_window2 Y1 X1 X2 | move_src_window Y3 Y4 X3 X4 +move_src_window Y1 Y2 X1 X2 | move Y3=y X3=x => \ + move3 Y1 X1 Y2 X2 Y3 X3 + +move_src_window Y1 Y3 X1 X2 X3 | move Y4=y X4=x | succ(Y3, Y4) => \ + move_src_window4 Y1 X1 X2 X3 X4 + +move_src_window Y1 y X1 X2 => move_src_window2 Y1 X1 X2 +move_src_window Y1 y X1 X2 X3 => move_src_window3 Y1 X1 X2 X3 + +move_src_window2 y x x +move_src_window3 y x x x +move_src_window4 y x x x x + # Swap registers. -move R1=x Tmp=x | move R2=xy R1 | move Tmp R2 => swap_temp R1 R2 Tmp +move R1=xy Tmp=x | move R2=xy R1 | move Tmp R2 => swap_temp R1 R2 Tmp + +# The compiler uses x(1022) when swapping registers. It will definitely +# not be used again. +swap_temp R1 R2 Tmp=x==1022 => swap R1 R2 + +swap_temp R1 R2 Tmp | move Src Tmp => swap R1 R2 | move Src Tmp swap_temp R1 R2 Tmp | line Loc | apply Live | is_killed_apply(Tmp, Live) => \ swap R1 R2 | line Loc | apply Live +swap_temp R1 R2 Tmp | line Loc | apply_last Live D | is_killed_apply(Tmp, Live) => \ + swap R1 R2 | line Loc | apply_last Live D + +swap_temp R1 R2 Tmp | line Loc | call_fun Live | is_killed_by_call_fun(Tmp, Live) => \ + swap R1 R2 | line Loc | call_fun Live +swap_temp R1 R2 Tmp | make_fun2 OldIndex=u | is_killed_by_make_fun(Tmp, OldIndex) => \ + swap R1 R2 | make_fun2 OldIndex swap_temp R1 R2 Tmp | line Loc | call Live Addr | is_killed(Tmp, Live) => \ swap R1 R2 | line Loc | call Live Addr @@ -307,84 +357,112 @@ swap_temp R1 R2 Tmp | line Loc | call_ext_only Live Addr | \ swap_temp R1 R2 Tmp | line Loc | call_ext_last Live Addr D | \ is_killed(Tmp, Live) => swap R1 R2 | line Loc | call_ext_last Live Addr D -swap_temp x xy x +swap_temp R1 R2 Tmp | call_ext Live Addr | is_killed(Tmp, Live) => \ + swap R1 R2 | call_ext Live Addr +swap_temp R1 R2 Tmp | call_ext_only Live Addr | is_killed(Tmp, Live) => \ + swap R1 R2 | call_ext_only Live Addr +swap_temp R1 R2 Tmp | call_ext_last Live Addr D | is_killed(Tmp, Live) => \ + swap R1 R2 | call_ext_last Live Addr D + +swap_temp R1 R2 Tmp | move Src Any | line Loc | call Live Addr | \ + is_killed(Tmp, Live) | distinct(Tmp, Src) => \ + swap R1 R2 | move Src Any | line Loc | call Live Addr +swap_temp R1 R2 Tmp | move Src Any | line Loc | call_ext Live Addr | \ + is_killed(Tmp, Live) | distinct(Tmp, Src) => \ + swap R1 R2 | move Src Any | line Loc | call_ext Live Addr +swap_temp R1 R2 Tmp | move Src Any | call_only Live Addr | \ + is_killed(Tmp, Live) | distinct(Tmp, Src) => \ + swap R1 R2 | move Src Any | call_only Live Addr +swap_temp R1 R2 Tmp | move Src Any | line Loc | call_ext_only Live Addr | \ + is_killed(Tmp, Live) | distinct(Tmp, Src) => \ + swap R1 R2 | move Src Any | line Loc | call_ext_only Live Addr +swap_temp R1 R2 Tmp | move Src Any | line Loc | call_fun Live | \ + is_killed(Tmp, Live) | distinct(Tmp, Src) => \ + swap R1 R2 | move Src Any | line Loc | call_fun Live + +swap_temp R1 R2 Tmp | line Loc | send | is_killed_by_send(Tmp) => \ + swap R1 R2 | line Loc | send + +# swap_temp/3 with Y register operands are rare. +swap_temp R1 R2=y Tmp => swap R1 R2 | move R2 Tmp +swap_temp R1=y R2 Tmp => swap R1 R2 | move R2 Tmp + +swap R1=x R2=y => swap R2 R1 + +swap_temp x x x + +swap xy x +swap y y + +# move_shift + +move SD=x D=x | move Src=cxy SD=x | distinct(D, Src) => move_shift Src SD D +move SD=y D=x | move Src=x SD=y | distinct(D, Src) => move_shift Src SD D +move SD=y D=x | init SD | => move_shift n SD D +move SD=x D=y | move Src=x SD=x | distinct(D, Src) => move_shift Src SD D +move SD=x==0 D=y | move Src=y SD=x==0 | distinct(D, Src) => move_shift Src SD D + +move_shift cxy x x +move_shift nx y x +move_shift x x y +move_shift y r y + +# move2_par x x x x -swap x xy +move X1=x X2=x | move X3=x X4=x | independent_moves(X1, X2, X3, X4) => \ + move2_par X1 X2 X3 X4 +move2_par x x x x -move Src=x D1=x | move Src=x D2=x => move_dup Src D1 D2 -move Src=x SD=x | move SD=x D=x => move_dup Src SD D -move Src=x D1=x | move Src=x D2=y => move_dup Src D1 D2 -move Src=y SD=x | move SD=x D=y => move_dup Src SD D -move Src=x SD=x | move SD=x D=y => move_dup Src SD D -move Src=y SD=x | move SD=x D=x => move_dup Src SD D +# move2_par x x x y -move SD=x D=x | move Src=xy SD=x => move_shift Src SD D -move SD=y D=x | move Src=x SD=y => move_shift Src SD D -move SD=x D=y | move Src=x SD=x => move_shift Src SD D +move X1=x X2=x | move X3=x Y1=y | independent_moves(X1, X2, X3, Y1) => \ + move2_par X1 X2 X3 Y1 +move X3=x Y1=y | move X1=x X2=x | independent_moves(X3, Y1, X1, X2) => \ + move2_par X1 X2 X3 Y1 +move2_par x x x y -# The transformations above guarantee that the source for -# the second move is not the same as the destination for -# the first move. That means that we can do the moves in -# parallel (fetch both values, then store them) which could -# be faster. +# move2_par y x y x -move X1=x Y1=y | move X2=x Y2=y => move2_par X1 Y1 X2 Y2 move Y1=y X1=x | move Y2=y X2=x => move2_par Y1 X1 Y2 X2 +move2_par y x y x -move X1=x X2=x | move X3=x X4=x => move2_par X1 X2 X3 X4 +# move2_par y x x y -move X1=x X2=x | move X3=x Y1=y => move2_par X1 X2 X3 Y1 +move S1=y S2=x | move X1=x Y1=y | independent_moves(S1, S2, X1, Y1) => \ + move2_par S1 S2 X1 Y1 +move X1=x Y1=y | move S1=y S2=x | independent_moves(S1, S2, X1, Y1) => \ + move2_par S1 S2 X1 Y1 +move2_par y x x y -move S1=x S2=x | move X1=x Y1=y => move2_par S1 S2 X1 Y1 +# move2_par y x x x -move S1=y S2=x | move X1=x Y1=y => move2_par S1 S2 X1 Y1 +move Y1=y X1=x | move S1=x D1=x | independent_moves(Y1, X1, S1, D1) => \ + move2_par Y1 X1 S1 D1 +move S1=x D1=x | move Y1=y X1=x | independent_moves(Y1, X1, S1, D1) => \ + move2_par Y1 X1 S1 D1 +move2_par y x x x -move Y1=y X1=x | move S1=x D1=x => move2_par Y1 X1 S1 D1 -move S1=x D1=x | move Y1=y X1=x => move2_par S1 D1 Y1 X1 +# move3 -move2_par X1=x Y1=y X2=x Y2=y | move X3=x Y3=y => move3 X1 Y1 X2 Y2 X3 Y3 move2_par Y1=y X1=x Y2=y X2=x | move Y3=y X3=x => move3 Y1 X1 Y2 X2 Y3 X3 move2_par X1=x X2=x X3=x X4=x | move X5=x X6=x => move3 X1 X2 X3 X4 X5 X6 +move3 y x y x y x +move3 x x x x x x + +# move_x1, move_x2 + move C=aiq X=x==1 => move_x1 C move C=aiq X=x==2 => move_x2 C +move n D=y => init D + move_x1 c move_x2 c -move_shift x x x -move_shift y x x -move_shift x y x -move_shift x x y - -move_dup xy x xy - -move2_par x y x y -move2_par y x y x -move2_par x x x x - -move2_par x x x y - -move2_par y x x y - -move2_par x x y x -move2_par y x x x - -move3 x y x y x y -move3 y x y x y x -move3 x x x x x x - -# The compiler almost never generates a "move Literal y(Y)" instruction, -# so let's cheat if we encounter one. -move S=n D=y => init D -move S=c D=y => move S x | move x D - -move x x -move x y -move y x -move c x +move xy xy +move c xy move n x -move y y # The following move instructions using x(0) are frequently used. @@ -478,14 +556,25 @@ is_ge f? c x is_ge f? s s %hot -is_eq f? s s +is_eq Fail=f Const=c Reg=xy => is_eq Fail Reg Const +is_eq Fail=f C1=c C2=c => move C1 x | is_eq Fail x C2 +is_eq f? S s -is_ne f? s s +is_ne Fail=f Const=c Reg=xy => is_ne Fail Reg Const +is_ne Fail=f C1=c C2=c => move C1 x | is_ne Fail x C2 +is_ne f? S s # -# Putting things. +# Putting tuples. +# +# Code compiled with OTP 22 and later uses put_tuple2 to +# to construct a tuple. +# +# Code compiled before OTP 22 uses put_tuple + one put instruction +# per element. Translate to put_tuple2. # +i_put_tuple/2 put_tuple Arity Dst => i_put_tuple Dst u i_put_tuple Dst Arity Puts=* | put S1 | put S2 | \ @@ -495,11 +584,13 @@ i_put_tuple Dst Arity Puts=* | put S1 | put S2 | \ i_put_tuple Dst Arity Puts=* | put S => \ tuple_append_put(Arity, Dst, Puts, S) -i_put_tuple/2 +i_put_tuple Dst Arity Puts=* => put_tuple2 Dst Arity Puts -i_put_tuple xy I +put_tuple2 xy I # +# Putting lists. +# # The instruction "put_list Const [] Dst" were generated in rare # circumstances up to and including OTP 18. Starting with OTP 19, # AFAIK, it should never be generated. @@ -510,32 +601,26 @@ put_list Src Dst=x Dst => update_list Src Dst update_list xyc x -put_list x n x -put_list y n x -put_list x x x -put_list y x x +# put_list SrcReg1 SrcReg2 => Dst + +put_list xy xy x -put_list y y x -put_list x y x +# put_list SrcReg [] => Dst -# put_list SrcReg Constant Dst +put_list xy n xy -put_list x c x -put_list x c y +# put_list SrcReg Constant => x -put_list y c x +put_list xy c x -# put_list Constant SrcReg Dst +# put_list Constant SrcReg => Dst -put_list c x x -put_list c y x +put_list c xy x # The following put_list instructions using x(0) are frequently used. -put_list r n r -put_list r n x -put_list r x x -put_list r x r +put_list r n rx +put_list r x rx put_list x x r %cold @@ -602,9 +687,18 @@ is_tuple f? rxy test_arity Fail Literal=q Arity => move Literal x | test_arity Fail x Arity test_arity Fail=f c Arity => jump Fail +test_arity Fail Tuple=x Arity | get_tuple_element Tuple Pos Dst=x => \ + test_arity_get_tuple_element Fail Tuple Arity Pos Dst test_arity f? xy A +test_arity_get_tuple_element f? x A P x + +is_tuple NotTupleFail Tuple=x | is_tagged_tuple WrongRecordFail Tuple Arity Atom => \ + is_tagged_tuple_ff NotTupleFail WrongRecordFail Tuple Arity Atom + +is_tagged_tuple_ff f? f? rx A a + get_tuple_element Reg=x P1 D1=x | get_tuple_element Reg=x P2 D2=x | \ get_tuple_element Reg=x P3 D3=x | \ succ(P1, P2) | succ(P2, P3) | \ @@ -613,8 +707,11 @@ get_tuple_element Reg=x P1 D1=x | get_tuple_element Reg=x P2 D2=x | \ get_tuple_element Reg=x P1 D1=x | get_tuple_element Reg=x P2 D2=x | \ succ(P1, P2) | succ(D1, D2) => i_get_tuple_element2 Reg P1 D1 +get_tuple_element Reg=x P1 D1=x | get_tuple_element Reg=x P2 D2=x | \ + succ(P1, P2) | distinct(D1, Reg) => i_get_tuple_element2_dst Reg P1 D1 D2 + get_tuple_element Reg=x P1 D1=y | get_tuple_element Reg=x P2 D2=y | \ - succ(P1, P2) => i_get_tuple_element2y Reg P1 D1 D2 + succ(P1, P2) => i_get_tuple_element2_dst Reg P1 D1 D2 get_tuple_element Reg P Dst => i_get_tuple_element Reg P Dst @@ -638,14 +735,21 @@ is_list f? y is_nonempty_list Fail=f S=x | allocate Need Rs => is_nonempty_list_allocate Fail S Need Rs -is_nonempty_list F=f x==0 | test_heap I1 I2 => is_nonempty_list_test_heap F I1 I2 - is_nonempty_list Fail=f S=x | get_list S D1=x D2=x => \ is_nonempty_list_get_list Fail S D1 D2 +is_nonempty_list Fail=f S=x | get_hd S Dst=x => \ + is_nonempty_list_get_hd Fail S Dst + +is_nonempty_list Fail=f S=x | get_tl S Dst=x => \ + is_nonempty_list_get_tl Fail S Dst + is_nonempty_list_allocate f? rx t t -is_nonempty_list_test_heap f? I t + is_nonempty_list_get_list f? rx x x +is_nonempty_list_get_hd f? x x +is_nonempty_list_get_tl f? x x + is_nonempty_list f? xy is_atom f? x @@ -710,11 +814,12 @@ is_boolean Fail=f ac => jump Fail is_boolean f? xy %hot -is_function2 Fail=f Literal=q Arity | literal_is_export(Literal) => -is_function2 Fail=f c Arity => jump Fail -is_function2 Fail=f Fun a => jump Fail +is_function2 Fail=f Fun Arity => gen_is_function2(Fail, Fun, Arity) -is_function2 f? S s +%cold +cold_is_function2 f? x x +%hot +hot_is_function2 f? S t # Allocating & initializing. allocate Need Regs | init Y => allocate_init Need Regs Y @@ -946,10 +1051,9 @@ call_ext_only u==0 u$func:os:perf_counter/0 => \ call_ext u Bif=u$is_bif => call_bif Bif -call_ext_last u Bif=u$is_bif D => call_bif Bif | deallocate_return D +call_ext_last u Bif=u$is_bif D => deallocate D | call_bif_only Bif -call_ext_only Ar=u Bif=u$is_bif => \ - allocate u Ar | call_bif Bif | deallocate_return u +call_ext_only Ar=u Bif=u$is_bif => call_bif_only Bif # # Any remaining calls are calls to Erlang functions, not BIFs. @@ -981,6 +1085,7 @@ i_perf_counter %hot call_bif e +call_bif_only e # # Calls to non-building and guard BIFs. @@ -989,14 +1094,18 @@ call_bif e bif0 u$bif:erlang:self/0 Dst=d => self Dst bif0 u$bif:erlang:node/0 Dst=d => node Dst +bif1 Fail=f Bif=u$bif:erlang:hd/1 Src=x Dst=x => is_nonempty_list_get_hd Fail Src Dst +bif1 Fail=f Bif=u$bif:erlang:tl/1 Src=x Dst=x => is_nonempty_list_get_tl Fail Src Dst + bif1 Fail Bif=u$bif:erlang:get/1 Src=s Dst=d => gen_get(Src, Dst) bif2 Jump=j u$bif:erlang:element/2 S1=s S2=xy Dst=d => gen_element(Jump, S1, S2, Dst) -bif1 p Bif S1 Dst => bif1_body Bif S1 Dst +bif1 p Bif S1 Dst => i_bif1_body S1 Bif Dst +bif1 Fail=f Bif S1 Dst => i_bif1 S1 Fail Bif Dst -bif2 p Bif S1 S2 Dst => i_bif2_body Bif S1 S2 Dst -bif2 Fail Bif S1 S2 Dst => i_bif2 Fail Bif S1 S2 Dst +bif2 p Bif S1 S2 Dst => i_bif2_body S2 S1 Bif Dst +bif2 Fail=f Bif S1 S2 Dst => i_bif2 S2 S1 Fail Bif Dst i_get_hash c I d i_get s d @@ -1014,10 +1123,12 @@ i_fast_element xy j? I d i_element xy j? s d -bif1 f? b s d -bif1_body b s d -i_bif2 f? b s s d -i_bif2_body b s s d +i_bif1 s f? b d +i_bif1_body s b d +i_bif2 s s f? b d +i_bif2_body s s b d +i_bif3 s s s f? b d +i_bif3_body s s s b d # # Internal calls. @@ -1062,8 +1173,25 @@ call_fun Arity => i_call_fun Arity i_call_fun t i_call_fun_last t Q + +# +# A fun with an empty environment can be converted to a literal. +# As a further optimization, the we try to move the fun to its +# final destination directly. + make_fun2 OldIndex=u => gen_make_fun2(OldIndex) +move_fun/2 +move_fun Fun X0 | move X0 Dst | move Src X0 => move Fun Dst | move Src X0 +move_fun Fun X0 | move A B | move X0 Dst | move Src X0 | \ + independent_moves(Fun, X0, A, B) | distinct(Dst, A) => \ + move Fun Dst | move A B | move Src X0 +move_fun Fun X0 | move X0 Dst | make_fun2 OldIndex | \ + is_killed_by_make_fun(X0, OldIndex)=> \ + move Fun Dst | make_fun2 OldIndex + +move_fun Fun Dst => move Fun Dst + %cold i_make_fun W t %hot @@ -1074,101 +1202,141 @@ is_function Fail=f c => jump Fail func_info M F A => i_func_info u M F A # ================================================================ -# New bit syntax matching (R11B). +# Bit syntax matching obsoleted in OTP 22. # ================================================================ -%warm +%cold bs_start_match2 Fail=f ica X Y D => jump Fail bs_start_match2 Fail Bin X Y D => i_bs_start_match2 Bin Fail X Y D -i_bs_start_match2 xy f t t x +i_bs_start_match2 xy f t t d +bs_save2 Y=y Index => move Y x | bs_save2 x Index bs_save2 Reg Index => gen_bs_save(Reg, Index) i_bs_save2 x t +bs_restore2 Y=y Index => move Y x | bs_restore2 x Index bs_restore2 Reg Index => gen_bs_restore(Reg, Index) i_bs_restore2 x t +bs_context_to_binary Y=y | line L | badmatch Y => \ + move Y x | bs_context_to_binary x | line L | badmatch x +bs_context_to_binary Y=y => move Y x | bs_context_to_binary x +bs_context_to_binary x +%warm + +# ================================================================ +# New bit syntax matching (R11B). +# ================================================================ + +%warm + # Matching integers bs_match_string Fail Ms Bits Val => i_bs_match_string Ms Fail Bits Val -i_bs_match_string x f W W +i_bs_match_string xy f W W # Fetching integers from binaries. -bs_get_integer2 Fail=f Ms=x Live=u Sz=sq Unit=u Flags=u Dst=d => \ +bs_get_integer2 Fail=f Ms=xy Live=u Sz=sq Unit=u Flags=u Dst=d => \ gen_get_integer2(Fail, Ms, Live, Sz, Unit, Flags, Dst) -i_bs_get_integer_small_imm x W f? t x -i_bs_get_integer_imm x W t f? t x -i_bs_get_integer f? t t x s x -i_bs_get_integer_8 x f? x -i_bs_get_integer_16 x f? x +i_bs_get_integer_small_imm Ms Bits Fail Flags Y=y => \ + i_bs_get_integer_small_imm Ms Bits Fail Flags x | move x Y + +i_bs_get_integer_imm Ms Bits Live Fail Flags Y=y => \ + i_bs_get_integer_imm Ms Bits Live Fail Flags x | move x Y + +i_bs_get_integer_small_imm xy W f? t x +i_bs_get_integer_imm xy W t f? t x +i_bs_get_integer xy f? t t s d +i_bs_get_integer_8 xy f? d +i_bs_get_integer_16 xy f? d %if ARCH_64 -i_bs_get_integer_32 x f? x +i_bs_get_integer_32 xy f? d %endif # Fetching binaries from binaries. -bs_get_binary2 Fail=f Ms=x Live=u Sz=sq Unit=u Flags=u Dst=d => \ +bs_get_binary2 Fail=f Ms=xy Live=u Sz=sq Unit=u Flags=u Dst=d => \ gen_get_binary2(Fail, Ms, Live, Sz, Unit, Flags, Dst) -i_bs_get_binary_imm2 f? x t W t x -i_bs_get_binary2 f x t? s t x -i_bs_get_binary_all2 f? x t t x -i_bs_get_binary_all_reuse x f? t +i_bs_get_binary_imm2 xy f? t W t d +i_bs_get_binary2 xy f t? s t d +i_bs_get_binary_all2 xy f? t t d # Fetching float from binaries. -bs_get_float2 Fail=f Ms=x Live=u Sz=s Unit=u Flags=u Dst=d => \ +bs_get_float2 Fail=f Ms=xy Live=u Sz=s Unit=u Flags=u Dst=d => \ gen_get_float2(Fail, Ms, Live, Sz, Unit, Flags, Dst) bs_get_float2 Fail=f Ms=x Live=u Sz=q Unit=u Flags=u Dst=d => jump Fail -i_bs_get_float2 f? x t s t x +i_bs_get_float2 xy f? t s t d # Miscellanous -bs_skip_bits2 Fail=f Ms=x Sz=sq Unit=u Flags=u => \ +bs_skip_bits2 Fail=f Ms=xy Sz=sq Unit=u Flags=u => \ gen_skip_bits2(Fail, Ms, Sz, Unit, Flags) -i_bs_skip_bits_imm2 f? x W -i_bs_skip_bits2 f? x xy t -i_bs_skip_bits_all2 f? x t +i_bs_skip_bits_imm2 f? xy W +i_bs_skip_bits2 xy xy f? t -bs_test_tail2 Fail=f Ms=x Bits=u==0 => bs_test_zero_tail2 Fail Ms -bs_test_tail2 Fail=f Ms=x Bits=u => bs_test_tail_imm2 Fail Ms Bits -bs_test_zero_tail2 f? x -bs_test_tail_imm2 f? x W +bs_test_tail2 Fail=f Ms=xy Bits=u==0 => bs_test_zero_tail2 Fail Ms +bs_test_tail2 Fail=f Ms=xy Bits=u => bs_test_tail_imm2 Fail Ms Bits +bs_test_zero_tail2 f? xy +bs_test_tail_imm2 f? xy W bs_test_unit F Ms Unit=u==8 => bs_test_unit8 F Ms -bs_test_unit f? x t -bs_test_unit8 f? x +bs_test_unit f? xy t +bs_test_unit8 f? xy -# An y register operand for bs_context_to_binary is rare, -# but can happen because of inlining. +# Gets a bitstring from the tail of a context. +bs_get_tail xy d t -bs_context_to_binary Y=y | line L | badmatch Y => \ - move Y x | bs_context_to_binary x | line L | badmatch x +# New bs_start_match variant for contexts with external position storage. +# +# bs_get/set_position is used to save positions into registers instead of +# "slots" in the context itself, which lets us continue matching even after +# we've passed it off to another function. -bs_context_to_binary Y=y => move Y x | bs_context_to_binary x +%if ARCH_64 +bs_start_match3 Fail Bin Live Ctx | bs_get_position Ctx Pos=x Ignored => \ + i_bs_start_match3_gp Bin Live Fail Ctx Pos +i_bs_start_match3_gp xy t f d x +%endif -bs_context_to_binary x +bs_start_match3 Fail=f ica Live Dst => jump Fail +bs_start_match3 Fail Bin Live Dst => i_bs_start_match3 Bin Live Fail Dst + +i_bs_start_match3 xy t f d + +# Match context position instructions. 64-bit assumes that all positions can +# fit into an unsigned small. + +%if ARCH_64 + bs_get_position Src Dst Live => i_bs_get_position Src Dst + i_bs_get_position xy xy + bs_set_position xy xy +%else + bs_get_position xy d t? + bs_set_position xy xy +%endif # # Utf8/utf16/utf32 support. (R12B-5) # -bs_get_utf8 Fail=f Ms=x u u Dst=d => i_bs_get_utf8 Ms Fail Dst -i_bs_get_utf8 x f? x +bs_get_utf8 Fail=f Ms=xy u u Dst=d => i_bs_get_utf8 Ms Fail Dst +i_bs_get_utf8 xy f? d -bs_skip_utf8 Fail=f Ms=x u u => i_bs_get_utf8 Ms Fail x +bs_skip_utf8 Fail=f Ms=xy u u => i_bs_get_utf8 Ms Fail x -bs_get_utf16 Fail=f Ms=x u Flags=u Dst=d => i_bs_get_utf16 Ms Fail Flags Dst -bs_skip_utf16 Fail=f Ms=x u Flags=u => i_bs_get_utf16 Ms Fail Flags x +bs_get_utf16 Fail=f Ms=xy u Flags=u Dst=d => i_bs_get_utf16 Ms Fail Flags Dst +bs_skip_utf16 Fail=f Ms=xy u Flags=u => i_bs_get_utf16 Ms Fail Flags x -i_bs_get_utf16 x f? t x +i_bs_get_utf16 xy f? t d -bs_get_utf32 Fail=f Ms=x Live=u Flags=u Dst=d => \ +bs_get_utf32 Fail=f Ms=xy Live=u Flags=u Dst=d => \ bs_get_integer2 Fail Ms Live i=32 u=1 Flags Dst | \ i_bs_validate_unicode_retract Fail Dst Ms -bs_skip_utf32 Fail=f Ms=x Live=u Flags=u => \ +bs_skip_utf32 Fail=f Ms=xy Live=u Flags=u => \ bs_get_integer2 Fail Ms Live i=32 u=1 Flags x | \ i_bs_validate_unicode_retract Fail x Ms @@ -1182,6 +1350,9 @@ i_bs_validate_unicode_retract j s S bs_init2 Fail Sz Words Regs Flags Dst | binary_too_big(Sz) => system_limit Fail +bs_init2 Fail Sz Words Regs Flags Dst=y => \ + bs_init2 Fail Sz Words Regs Flags x | move x Dst + bs_init2 Fail Sz=u Words=u==0 Regs Flags Dst => i_bs_init Sz Regs Dst bs_init2 Fail Sz=u Words Regs Flags Dst => \ @@ -1202,6 +1373,8 @@ i_bs_init_heap W I t? x bs_init_bits Fail Sz=o Words Regs Flags Dst => system_limit Fail +bs_init_bits Fail Sz Words Regs Flags Dst=y => \ + bs_init_bits Fail Sz Words Regs Flags x | move x Dst bs_init_bits Fail Sz=u Words=u==0 Regs Flags Dst => i_bs_init_bits Sz Regs Dst bs_init_bits Fail Sz=u Words Regs Flags Dst => i_bs_init_bits_heap Sz Words Regs Dst @@ -1230,7 +1403,7 @@ bs_private_append Fail Size Unit Bin Flags Dst => \ bs_init_writable -i_bs_append j? I t? t s x +i_bs_append j? I t? t s xy i_bs_private_append j? t s S x # @@ -1240,31 +1413,35 @@ i_bs_private_append j? t s S x bs_put_integer Fail=j Sz=sq Unit=u Flags=u Src=s => \ gen_put_integer(Fail, Sz, Unit, Flags, Src) -i_new_bs_put_integer j? s t s -i_new_bs_put_integer_imm j? W t s +i_new_bs_put_integer j? S t s +i_new_bs_put_integer_imm xyc j? W t # # Utf8/utf16/utf32 support. (R12B-5) # -bs_utf8_size j Src=s Dst=d => i_bs_utf8_size Src Dst +bs_utf8_size j Src Dst=d => i_bs_utf8_size Src Dst +bs_utf16_size j Src Dst=d => i_bs_utf16_size Src Dst -i_bs_utf8_size s x +bs_put_utf8 Fail u Src => i_bs_put_utf8 Fail Src -bs_utf16_size j Src=s Dst=d => i_bs_utf16_size Src Dst - -i_bs_utf16_size s x - -bs_put_utf8 Fail u Src=s => i_bs_put_utf8 Fail Src +bs_put_utf32 Fail=j Flags=u Src=s => \ + i_bs_validate_unicode Fail Src | bs_put_integer Fail i=32 u=1 Flags Src -i_bs_put_utf8 j? s +i_bs_utf8_size S x +i_bs_utf16_size S x -bs_put_utf16 j? t s +i_bs_put_utf8 j? S +bs_put_utf16 j? t S -bs_put_utf32 Fail=j Flags=u Src=s => \ - i_bs_validate_unicode Fail Src | bs_put_integer Fail i=32 u=1 Flags Src +i_bs_validate_unicode j? S -i_bs_validate_unicode j? s +# Handle unoptimized code. +i_bs_utf8_size Src=c Dst => move Src x | i_bs_utf8_size x Dst +i_bs_utf16_size Src=c Dst => move Src x | i_bs_utf16_size x Dst +i_bs_put_utf8 Fail Src=c => move Src x | i_bs_put_utf8 Fail x +bs_put_utf16 Fail Flags Src=c => move Src x | bs_put_utf16 Fail Flags x +i_bs_validate_unicode Fail Src=c => move Src x | i_bs_validate_unicode Fail x # # Storing floats into binaries. @@ -1274,7 +1451,7 @@ bs_put_float Fail Sz=q Unit Flags Val => badarg Fail bs_put_float Fail=j Sz=s Unit=u Flags=u Src=s => \ gen_put_float(Fail, Sz, Unit, Flags, Src) -i_new_bs_put_float j? s t s +i_new_bs_put_float j? S t s i_new_bs_put_float_imm j? W t s # @@ -1284,9 +1461,18 @@ i_new_bs_put_float_imm j? W t s bs_put_binary Fail=j Sz=s Unit=u Flags=u Src=s => \ gen_put_binary(Fail, Sz, Unit, Flags, Src) -i_new_bs_put_binary j? s t s -i_new_bs_put_binary_imm j? W s -i_new_bs_put_binary_all j? s t +# In unoptimized code, the binary argument could be a literal. (In optimized code, +# there would be a bs_put_string instruction.) +i_new_bs_put_binary Fail Size Unit Lit=c => \ + move Lit x | i_new_bs_put_binary Fail Size Unit x +i_new_bs_put_binary_imm Fail Size Lit=c => \ + move Lit x | i_new_bs_put_binary_imm Fail Size x +i_new_bs_put_binary_all Lit=c Fail Unit => \ + move Lit x | i_new_bs_put_binary_all x Fail Unit + +i_new_bs_put_binary j? S t S +i_new_bs_put_binary_imm j? W S +i_new_bs_put_binary_all xy j? t # # Warning: The i_bs_put_string and i_new_bs_put_string instructions @@ -1384,23 +1570,22 @@ put_map_exact F Map Dst Live Size Rest=* | map_key_sort(Size, Rest) => \ sorted_put_map_assoc Map Dst Live Size Rest=* | is_empty_map(Map) => \ new_map Dst Live Size Rest -sorted_put_map_assoc Src=s Dst Live Size Rest=* => \ - update_map_assoc Src Dst Live Size Rest -sorted_put_map_assoc Src Dst Live Size Rest=* => \ - move Src x | update_map_assoc x Dst Live Size Rest +sorted_put_map_assoc Src=xyc Dst Live Size Rest=* => \ + update_map_assoc Src Dst Live Size Rest -sorted_put_map_exact F Src=s Dst Live Size Rest=* => \ - update_map_exact F Src Dst Live Size Rest -sorted_put_map_exact F Src Dst Live Size Rest=* => \ - move Src x | update_map_exact F x Dst Live Size Rest +sorted_put_map_exact Fail Src=xy Dst Live Size Rest=* => \ + update_map_exact Src Fail Dst Live Size Rest +# Literal map arguments for an exact update operation are extremely rare. +sorted_put_map_exact Fail Src Dst Live Size Rest=* => \ + move Src x | update_map_exact x Fail Dst Live Size Rest new_map Dst Live Size Rest=* | is_small_map_literal_keys(Size, Rest) => \ gen_new_small_map_lit(Dst, Live, Size, Rest) new_map d t I i_new_small_map_lit d t q -update_map_assoc s d t I -update_map_exact j? s d t I +update_map_assoc xyc d t I +update_map_exact xy j? d t I is_map Fail Lit=q | literal_is_map(Lit) => is_map Fail cq => jump Fail @@ -1447,80 +1632,93 @@ gc_bif2 Fail Live u$bif:erlang:sminus/2 S1 S2 Dst => \ # # Optimize addition and subtraction of small literals using -# the i_increment/4 instruction (in bodies, not in guards). +# the i_increment/3 instruction (in bodies, not in guards). # gen_plus p Live Int=i Reg=d Dst => \ - gen_increment(Reg, Int, Live, Dst) + gen_increment(Reg, Int, Dst) gen_plus p Live Reg=d Int=i Dst => \ - gen_increment(Reg, Int, Live, Dst) + gen_increment(Reg, Int, Dst) gen_minus p Live Reg=d Int=i Dst | negation_is_small(Int) => \ - gen_increment_from_minus(Reg, Int, Live, Dst) + gen_increment_from_minus(Reg, Int, Dst) # -# GCing arithmetic instructions. +# Arithmetic instructions. # -gen_plus Fail Live S1 S2 Dst => i_plus S1 S2 Fail Live Dst +# It is OK to swap arguments for '+' in a guard. It is also +# OK to turn minus into plus in a guard. +gen_plus Fail=f Live S1=c S2 Dst => i_plus S2 S1 Fail Dst +gen_minus Fail=f Live S1 S2=i Dst => gen_plus_from_minus(Fail, Live, S1, S2, Dst) + +gen_plus Fail Live S1 S2 Dst => i_plus S1 S2 Fail Dst -gen_minus Fail Live S1 S2 Dst => i_minus S1 S2 Fail Live Dst +gen_minus Fail Live S1 S2 Dst => i_minus S1 S2 Fail Dst gc_bif2 Fail Live u$bif:erlang:stimes/2 S1 S2 Dst => \ - i_times Fail Live S1 S2 Dst + i_times Fail S1 S2 Dst gc_bif2 Fail Live u$bif:erlang:div/2 S1 S2 Dst => \ - i_m_div Fail Live S1 S2 Dst + i_m_div Fail S1 S2 Dst gc_bif2 Fail Live u$bif:erlang:intdiv/2 S1 S2 Dst => \ - i_int_div Fail Live S1 S2 Dst + i_int_div Fail S1 S2 Dst gc_bif2 Fail Live u$bif:erlang:rem/2 S1 S2 Dst => \ - i_rem S1 S2 Fail Live Dst + i_rem S1 S2 Fail Dst gc_bif2 Fail Live u$bif:erlang:bsl/2 S1 S2 Dst => \ - i_bsl S1 S2 Fail Live Dst + i_bsl S1 S2 Fail Dst gc_bif2 Fail Live u$bif:erlang:bsr/2 S1 S2 Dst => \ - i_bsr S1 S2 Fail Live Dst + i_bsr S1 S2 Fail Dst gc_bif2 Fail Live u$bif:erlang:band/2 S1 S2 Dst => \ - i_band S1 S2 Fail Live Dst + i_band S1 S2 Fail Dst gc_bif2 Fail Live u$bif:erlang:bor/2 S1 S2 Dst => \ - i_bor Fail Live S1 S2 Dst + i_bor Fail S1 S2 Dst gc_bif2 Fail Live u$bif:erlang:bxor/2 S1 S2 Dst => \ - i_bxor Fail Live S1 S2 Dst + i_bxor Fail S1 S2 Dst -gc_bif1 Fail I u$bif:erlang:bnot/1 Src Dst=d => i_int_bnot Fail Src I Dst +gc_bif1 Fail Live u$bif:erlang:bnot/1 Src Dst=d => i_int_bnot Fail Src Dst -i_increment rxy W t d +i_increment rxy W d -i_plus x xy j? t d -i_plus s s j? t d +# Handle unoptimized code. +i_plus S1=c S2=c Fail Dst => move S1 x | i_plus x S2 Fail Dst -i_minus x x j? t d -i_minus s s j? t d +i_plus xy xyc j? d -i_times j? t s s d +# A minus instruction with a constant right operand will be +# converted to an i_increment instruction, except in guards or +# when the negated value of the constant won't fit in a guard. +# Therefore, it very rare. +i_minus S1 S2=c Fail Dst => move S2 x | i_minus S1 x Fail Dst -i_m_div j? t s s d -i_int_div j? t s s d +i_minus xy xy j? d +i_minus c xy j? d -i_rem x x j? t d -i_rem s s j? t d +i_times j? s s d -i_bsl s s j? t d -i_bsr s s j? t d +i_m_div j? s s d +i_int_div j? s s d -i_band x c j? t d -i_band s s j? t d +i_rem x x j? d +i_rem s s j? d -i_bor j? I s s d -i_bxor j? I s s d +i_bsl s s j? d +i_bsr s s j? d -i_int_bnot Fail Src=c Live Dst => move Src x | i_int_bnot Fail x Live Dst +i_band x c j? d +i_band s s j? d -i_int_bnot j? S t d +i_bor j? s s d +i_bxor j? s s d + +i_int_bnot Fail Src=c Dst => move Src x | i_int_bnot Fail x Dst + +i_int_bnot j? S d # # Old guard BIFs that creates heap fragments are no longer allowed. @@ -1533,29 +1731,28 @@ bif1 Fail u$bif:erlang:round/1 s d => too_old_compiler bif1 Fail u$bif:erlang:trunc/1 s d => too_old_compiler # -# Guard BIFs. +# Handle the length/1 guard BIF specially to make it trappable. # -gc_bif1 Fail I Bif Src Dst => \ - gen_guard_bif1(Fail, I, Bif, Src, Dst) - -gc_bif2 Fail I Bif S1 S2 Dst => \ - gen_guard_bif2(Fail, I, Bif, S1, S2, Dst) -gc_bif3 Fail I Bif S1 S2 S3 Dst => \ - gen_guard_bif3(Fail, I, Bif, S1, S2, S3, Dst) +gc_bif1 Fail=j Live u$bif:erlang:length/1 Src Dst => \ + i_length_setup Live Src | i_length Fail Live Dst -i_gc_bif1 j? W s t? d +i_length_setup Live Src=c => move Src x | i_length_setup Live x -i_gc_bif2 j? W t? s s d +i_length_setup t xy +i_length j? t d -ii_gc_bif3/7 +# +# Guard BIFs. +# +gc_bif1 p Live Bif Src Dst => i_bif1_body Src Bif Dst +gc_bif1 Fail=f Live Bif Src Dst => i_bif1 Src Fail Bif Dst -# A specific instruction can only have 6 operands, so we must -# pass one of the arguments in an x register. -ii_gc_bif3 Fail Bif Live S1 S2 S3 Dst => \ - move S1 x | i_gc_bif3 Fail Bif Live S2 S3 Dst +gc_bif2 p Live Bif S1 S2 Dst => i_bif2_body S2 S1 Bif Dst +gc_bif2 Fail=f Live Bif S1 S2 Dst => i_bif2 S2 S1 Fail Bif Dst -i_gc_bif3 j? W t? s s d +gc_bif3 p Live Bif S1 S2 S3 Dst => i_bif3_body S3 S2 S1 Bif Dst +gc_bif3 Fail=f Live Bif S1 S2 S3 Dst => i_bif3 S3 S2 S1 Fail Bif Dst # # The following instruction is specially handled in beam_load.c |