From c76b297463feee133adad510128d312536150392 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 16 Jun 2015 10:06:20 +0200 Subject: Eliminate the use of i_fetch in arithmetic instructions The i_fetch instruction fetches two operands and places them in the tmp_arg1 and tmp_arg2 variables. The next instruction (such as i_plus) does not have to handle different types of operands, but can get get them simply from the tmp_arg* variables. Thus, i_fetch was introduced as a way to temper a potentail combinatorial explosion. Unfortunately, clang will generate terrible code because of the tmp_arg1 and tmp_arg2 variables being live across multiple instructions. Note that Clang has no way to predict the control flow from one instruction to another. Clang must assume that any instruction can jump to any other instruction. Somehow GCC manages to cope with this situation much better. Therefore, to improve the quality of the code generated by clang, we must eliminate all uses of the tmp_arg1 and tmp_arg2 variables. This commit eliminates the use of i_fetch in combination with the arithmetic and logical instructions. While we are touching the code for the bsr and bsl instructions, also move the tmp_big[] array from top scope of process main into the block that encloses the bsr and bsl instructions. --- erts/emulator/beam/ops.tab | 93 ++++++++++++++++++++++++++++++---------------- 1 file changed, 60 insertions(+), 33 deletions(-) (limited to 'erts/emulator/beam/ops.tab') diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 335f8e2db5..e4a757ec8b 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -1475,68 +1475,95 @@ i_get_map_element f y x x i_get_map_element f x x y i_get_map_element f y x y +# +# Convert the plus operations to a generic plus instruction. +# +gen_plus/5 +gen_minus/5 + +gc_bif1 Fail Live u$bif:erlang:splus/1 Src Dst => \ + gen_plus Fail Live Src i Dst +gc_bif2 Fail Live u$bif:erlang:splus/2 S1 S2 Dst => \ + gen_plus Fail Live S1 S2 Dst + +gc_bif1 Fail Live u$bif:erlang:sminus/1 Src Dst => \ + gen_minus Fail Live i Src Dst +gc_bif2 Fail Live u$bif:erlang:sminus/2 S1 S2 Dst => \ + gen_minus Fail Live S1 S2 Dst + # # Optimize addition and subtraction of small literals using # the i_increment/4 instruction (in bodies, not in guards). # -gc_bif2 p Live u$bif:erlang:splus/2 Int=i Reg=x Dst => \ +gen_plus p Live Int=i Reg=d Dst => \ gen_increment(Reg, Int, Live, Dst) -gc_bif2 p Live u$bif:erlang:splus/2 Reg=x Int=i Dst => \ +gen_plus p Live Reg=d Int=i Dst => \ gen_increment(Reg, Int, Live, Dst) -gc_bif2 p Live u$bif:erlang:sminus/2 Reg=d Int=i Dst | \ - negation_is_small(Int) => \ +gen_minus p Live Reg=d Int=i Dst | negation_is_small(Int) => \ gen_increment_from_minus(Reg, Int, Live, Dst) # # GCing arithmetic instructions. # -gc_bif2 Fail I u$bif:erlang:splus/2 S1=x S2=x Dst=d => i_plus Fail I S1 S2 Dst -gc_bif2 Fail I u$bif:erlang:splus/2 S1 S2 Dst=d => i_fetch S1 S2 | i_plus Fail I Dst -gc_bif2 Fail I u$bif:erlang:sminus/2 S1=x S2=x Dst=d => i_minus Fail I S1 S2 Dst -gc_bif2 Fail I u$bif:erlang:sminus/2 S1 S2 Dst=d => i_fetch S1 S2 | i_minus Fail I Dst -gc_bif2 Fail I u$bif:erlang:stimes/2 S1 S2 Dst=d => i_fetch S1 S2 | i_times Fail I Dst -gc_bif2 Fail I u$bif:erlang:div/2 S1 S2 Dst=d => i_fetch S1 S2 | i_m_div Fail I Dst +gen_plus Fail Live S1 S2 Dst => i_plus Fail Live S1 S2 Dst -gc_bif2 Fail I u$bif:erlang:intdiv/2 S1 S2 Dst=d => i_fetch S1 S2 | i_int_div Fail I Dst -gc_bif2 Fail I u$bif:erlang:rem/2 S1=x S2=x Dst=d => i_rem Fail I S1 S2 Dst -gc_bif2 Fail I u$bif:erlang:rem/2 S1 S2 Dst=d => i_fetch S1 S2 | i_rem Fail I Dst +gen_minus Fail Live S1 S2 Dst => i_minus Fail Live S1 S2 Dst -gc_bif2 Fail I u$bif:erlang:bsl/2 S1 S2 Dst=d => i_fetch S1 S2 | i_bsl Fail I Dst -gc_bif2 Fail I u$bif:erlang:bsr/2 S1 S2 Dst=d => i_fetch S1 S2 | i_bsr Fail I Dst +gc_bif2 Fail Live u$bif:erlang:stimes/2 S1 S2 Dst => \ + i_times Fail Live S1 S2 Dst -gc_bif2 Fail I u$bif:erlang:band/2 S1=x S2=c Dst=d => i_band Fail I S1 S2 Dst -gc_bif2 Fail I u$bif:erlang:band/2 S1 S2 Dst=d => i_fetch S1 S2 | i_band Fail I Dst -gc_bif2 Fail I u$bif:erlang:bor/2 S1 S2 Dst=d => i_fetch S1 S2 | i_bor Fail I Dst -gc_bif2 Fail I u$bif:erlang:bxor/2 S1 S2 Dst=d => i_fetch S1 S2 | i_bxor Fail I Dst +gc_bif2 Fail Live u$bif:erlang:div/2 S1 S2 Dst => \ + i_m_div Fail Live S1 S2 Dst +gc_bif2 Fail Live u$bif:erlang:intdiv/2 S1 S2 Dst => \ + i_int_div Fail Live S1 S2 Dst -gc_bif1 Fail I u$bif:erlang:bnot/1 Src Dst=d => i_int_bnot Fail Src I Dst +gc_bif2 Fail Live u$bif:erlang:rem/2 S1 S2 Dst => \ + i_rem Fail Live S1 S2 Dst + +gc_bif2 Fail Live u$bif:erlang:bsl/2 S1 S2 Dst => \ + i_bsl Fail Live S1 S2 Dst +gc_bif2 Fail Live u$bif:erlang:bsr/2 S1 S2 Dst => \ + i_bsr Fail Live S1 S2 Dst + +gc_bif2 Fail Live u$bif:erlang:band/2 S1 S2 Dst => \ + i_band Fail Live S1 S2 Dst -gc_bif1 Fail I u$bif:erlang:sminus/1 Src Dst=d => i_fetch i Src | i_minus Fail I Dst -gc_bif1 Fail I u$bif:erlang:splus/1 Src Dst=d => i_fetch i Src | i_plus Fail I Dst +gc_bif2 Fail Live u$bif:erlang:bor/2 S1 S2 Dst => \ + i_bor Fail Live S1 S2 Dst + +gc_bif2 Fail Live u$bif:erlang:bxor/2 S1 S2 Dst => \ + i_bxor Fail Live S1 S2 Dst + +gc_bif1 Fail I u$bif:erlang:bnot/1 Src Dst=d => i_int_bnot Fail Src I Dst i_increment x I I d i_increment y I I d i_plus j I x x d -i_plus j I d +i_plus j I s s d + i_minus j I x x d -i_minus j I d -i_times j I d -i_m_div j I d -i_int_div j I d +i_minus j I s s d + +i_times j I s s d + +i_m_div j I s s d +i_int_div j I s s d + i_rem j I x x d -i_rem j I d +i_rem j I s s d -i_bsl j I d -i_bsr j I d +i_bsl j I s s d +i_bsr j I s s d i_band j I x c d -i_band j I d -i_bor j I d -i_bxor j I d +i_band j I s s d + +i_bor j I s s d +i_bxor j I s s d i_int_bnot j s I d -- cgit v1.2.3