From c1aa239e6ff52f699dd2fec761073a5effa93808 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 6 Mar 2019 06:49:55 +0100 Subject: beam_emu.c: Rename the confusing macro GetR() to GetSource() --- erts/emulator/beam/beam_emu.c | 14 +++++++------- erts/emulator/utils/beam_makeops | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 90162a6543..04a2a83123 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -322,19 +322,19 @@ void** beam_ops; #define Arg(N) I[(N)+1] -#define GetR(pos, tr) \ +#define GetSource(raw, dst) \ do { \ - tr = Arg(pos); \ - switch (loader_tag(tr)) { \ + dst = raw; \ + switch (loader_tag(dst)) { \ case LOADER_X_REG: \ - tr = x(loader_x_reg_index(tr)); \ + dst = x(loader_x_reg_index(dst)); \ break; \ case LOADER_Y_REG: \ - ASSERT(loader_y_reg_index(tr) >= 1); \ - tr = y(loader_y_reg_index(tr)); \ + ASSERT(loader_y_reg_index(dst) >= 1); \ + dst = y(loader_y_reg_index(dst)); \ break; \ } \ - CHECK_TERM(tr); \ + CHECK_TERM(dst); \ } while (0) #define PUT_TERM_REG(term, desc) \ diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops index f73e2362bf..1625b2cc65 100755 --- a/erts/emulator/utils/beam_makeops +++ b/erts/emulator/utils/beam_makeops @@ -1488,7 +1488,7 @@ sub code_gen { $var_decls .= "Eterm $tmp;\n"; $tmp_arg_num++; push(@f, $tmp); - $prefix .= "GetR($arg_offset, $tmp);\n"; + $prefix .= "GetSource(" . arg_offset($arg_offset) . ", $tmp);\n"; $need_block = 1; last SWITCH; }; -- cgit v1.2.3 From 131bb83e7ba123d35ea8201832ce2165fc447b10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 6 Mar 2019 15:41:45 +0100 Subject: bif_instrs.tab: Don't hardcode length of instructions --- erts/emulator/beam/bif_instrs.tab | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/bif_instrs.tab b/erts/emulator/beam/bif_instrs.tab index 418bbe2b23..8499f61114 100644 --- a/erts/emulator/beam/bif_instrs.tab +++ b/erts/emulator/beam/bif_instrs.tab @@ -269,7 +269,7 @@ call_bif(Exp) { CHECK_TERM(r(0)); $NEXT0(); } else if (c_p->freason == TRAP) { - SET_CP(c_p, I+2); + SET_CP(c_p, $NEXT_INSTRUCTION); SET_I(c_p->i); SWAPIN; Dispatch(); @@ -313,7 +313,7 @@ send() { r(0) = result; CHECK_TERM(r(0)); } else if (c_p->freason == TRAP) { - SET_CP(c_p, I+1); + SET_CP(c_p, $NEXT_INSTRUCTION); SET_I(c_p->i); SWAPIN; Dispatch(); -- cgit v1.2.3 From 85b3b9d1930b0addad09c56576608c5133a4976f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Sat, 2 Mar 2019 14:05:12 +0100 Subject: Optimize hd/1 and tl/1 in guards --- erts/emulator/beam/ops.tab | 3 +++ 1 file changed, 3 insertions(+) (limited to 'erts') diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 3cfc685336..9237fa1069 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -1035,6 +1035,9 @@ call_bif e bif0 u$bif:erlang:self/0 Dst=d => self Dst bif0 u$bif:erlang:node/0 Dst=d => node Dst +bif1 Fail=f Bif=u$bif:erlang:hd/1 Src=x Dst=x => is_nonempty_list_get_hd Fail Src Dst +bif1 Fail=f Bif=u$bif:erlang:tl/1 Src=x Dst=x => is_nonempty_list_get_tl Fail Src Dst + bif1 Fail Bif=u$bif:erlang:get/1 Src=s Dst=d => gen_get(Src, Dst) bif2 Jump=j u$bif:erlang:element/2 S1=s S2=xy Dst=d => gen_element(Jump, S1, S2, Dst) -- cgit v1.2.3 From ca68fe8277c5f534d32c8d5bf0e5ba66ebf124e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Sun, 3 Mar 2019 07:46:38 +0100 Subject: Introduce move_window2 and remove move2_par_xyxy --- erts/emulator/beam/instrs.tab | 9 +++++++++ erts/emulator/beam/ops.tab | 9 ++++----- 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/instrs.tab b/erts/emulator/beam/instrs.tab index fc88cab22f..5b81517359 100644 --- a/erts/emulator/beam/instrs.tab +++ b/erts/emulator/beam/instrs.tab @@ -512,6 +512,15 @@ move_shift(Src, SD, D) { $SD = V; } +move_window2(S1, S2, D) { + Eterm xt0, xt1; + Eterm* y = &$D; + xt0 = $S1; + xt1 = $S2; + y[0] = xt0; + y[1] = xt1; +} + move_window3(S1, S2, S3, D) { Eterm xt0, xt1, xt2; Eterm* y = &$D; diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 9237fa1069..448412c903 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -285,6 +285,9 @@ move_window/6 move X1=x Y1=y | move X2=x Y2=y | move X3=x Y3=y | succ(Y1,Y2) | succ(Y2,Y3) => \ move_window X1 X2 X3 Y1 Y3 +move X1=x Y1=y | move X2=x Y2=y | succ(Y1,Y2) => \ + move_window2 X1 X2 Y1 + move_window X1=x X2=x X3=x Y1=y Y3=y | move X4=x Y4=y | succ(Y3,Y4) => \ move_window X1 X2 X3 X4 Y1 Y4 @@ -294,6 +297,7 @@ move_window X1=x X2=x X3=x X4=x Y1=y Y4=y | move X5=x Y5=y | succ(Y4,Y5) => \ move_window X1=x X2=x X3=x Y1=y Y3=y => move_window3 X1 X2 X3 Y1 move_window X1=x X2=x X3=x X4=x Y1=y Y4=y => move_window4 X1 X2 X3 X4 Y1 +move_window2 x x y move_window3 x x x y move_window4 x x x x y move_window5 x x x x x y @@ -346,11 +350,6 @@ move X1=x X2=x | move X3=x X4=x | independent_moves(X1, X2, X3, X4) => \ move2_par X1 X2 X3 X4 move2_par x x x x -# move2_par x y x y - -move X1=x Y1=y | move X2=x Y2=y => move2_par X1 Y1 X2 Y2 -move2_par x y x y - # move2_par x x x y move X1=x X2=x | move X3=x Y1=y | independent_moves(X1, X2, X3, Y1) => \ -- cgit v1.2.3 From 90bf10302a792865cdb0f741e23ca130725e7461 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Sun, 3 Mar 2019 08:13:47 +0100 Subject: Remove optimization that has become a pessimization The compiler used to generate "move Literal y(Y)" instructions very rarely. Therefore, there was a transformation to avoid having a "move c y" instruction. With the new compiler, "move Literal y(Y)" instructions are relatively frequent, so we will need a "move c y" instruction. --- erts/emulator/beam/ops.tab | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 448412c903..81f598d266 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -392,20 +392,14 @@ move3 x x x x x x move C=aiq X=x==1 => move_x1 C move C=aiq X=x==2 => move_x2 C +move n D=y => init D + move_x1 c move_x2 c -# The compiler almost never generates a "move Literal y(Y)" instruction, -# so let's cheat if we encounter one. -move S=n D=y => init D -move S=c D=y => move S x | move x D - -move x x -move x y -move y x -move c x +move xy xy +move c xy move n x -move y y # The following move instructions using x(0) are frequently used. -- cgit v1.2.3 From 6035ea12396bcd1468e83a3b892180e70fd24df8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Sun, 3 Mar 2019 08:34:01 +0100 Subject: Reclassify get_tuple_element with a Y destination as hot get_tuple_element with an Y register has become more frequent with the new compiler. --- erts/emulator/beam/ops.tab | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 81f598d266..79c5afd238 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -215,11 +215,7 @@ set_tuple_element s S P # Get tuple element -i_get_tuple_element xy P x - -%cold -i_get_tuple_element xy P y -%hot +i_get_tuple_element xy P xy i_get_tuple_element2 x P x i_get_tuple_element2_dst x P x x -- cgit v1.2.3 From 0a65a7b517ced326b0b0754497a2285821df60f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Sun, 3 Mar 2019 15:58:36 +0100 Subject: Deoptimize obsoleted binary matching instructions Mark the obsoleted instructions bs_start_match2, bs_save2, bs_restore2, and bs_context_to_binary as cold. Remove support of a Y operand for bs_save2 and bs_restore2. --- erts/emulator/beam/ops.tab | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 79c5afd238..969d2728a5 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -1115,19 +1115,33 @@ is_function Fail=f c => jump Fail func_info M F A => i_func_info u M F A # ================================================================ -# New bit syntax matching (R11B). +# Bit syntax matching obsoleted in OTP 22. # ================================================================ -%warm +%cold bs_start_match2 Fail=f ica X Y D => jump Fail bs_start_match2 Fail Bin X Y D => i_bs_start_match2 Bin Fail X Y D i_bs_start_match2 xy f t t d +bs_save2 Y=y Index => move Y x | bs_save2 x Index bs_save2 Reg Index => gen_bs_save(Reg, Index) -i_bs_save2 xy t +i_bs_save2 x t +bs_restore2 Y=y Index => move Y x | bs_restore2 x Index bs_restore2 Reg Index => gen_bs_restore(Reg, Index) -i_bs_restore2 xy t +i_bs_restore2 x t + +bs_context_to_binary Y=y | line L | badmatch Y => \ + move Y x | bs_context_to_binary x | line L | badmatch x +bs_context_to_binary Y=y => move Y x | bs_context_to_binary x +bs_context_to_binary x +%warm + +# ================================================================ +# New bit syntax matching (R11B). +# ================================================================ + +%warm # Matching integers bs_match_string Fail Ms Bits Val => i_bs_match_string Ms Fail Bits Val @@ -1189,16 +1203,6 @@ bs_test_unit F Ms Unit=u==8 => bs_test_unit8 F Ms bs_test_unit f? xy t bs_test_unit8 f? xy -# An y register operand for bs_context_to_binary is rare, -# but can happen because of inlining. - -bs_context_to_binary Y=y | line L | badmatch Y => \ - move Y x | bs_context_to_binary x | line L | badmatch x - -bs_context_to_binary Y=y => move Y x | bs_context_to_binary x - -bs_context_to_binary x - # Gets a bitstring from the tail of a context. bs_get_tail xy d t -- cgit v1.2.3 From 82431098ec653fc98ab5a9114609f6c8a5a646e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 4 Mar 2019 06:43:06 +0100 Subject: Reduce code size for binary matching instructions The new compiler required adding support for Y register for all binary matching instructions. That was (intentionally) done in a naive way that simplicated duplicated the entire body of each instruction. Now it's time to be less naive. Rewrite the binary matching instructions using micro instructions. Because some of the binary instructions are huge, that will significantly decrease the size of process_main(). When compiling with clang, a huge process_main() would mess up profile-guide optimization resulting in a significant performance degradation. On my Mac, profile-guide optimzation would decrease the estone benchmark by 100K estones (about 20 percent). This commit gives me back the lost estones. --- erts/emulator/beam/beam_load.c | 36 +++--- erts/emulator/beam/bs_instrs.tab | 230 +++++++++++++++++++++++++++++++-------- erts/emulator/beam/ops.tab | 12 +- 3 files changed, 210 insertions(+), 68 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index f4eeb54a1b..5aaba78de4 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -3295,11 +3295,11 @@ gen_get_integer2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, GenOpArg Live, } else { op->op = genop_i_bs_get_integer_6; op->arity = 6; - op->a[0] = Fail; - op->a[1] = Live; - op->a[2].type = TAG_u; - op->a[2].val = (Unit.val << 3) | Flags.val; - op->a[3] = Ms; + op->a[0] = Ms; + op->a[1] = Fail; + op->a[2] = Live; + op->a[3].type = TAG_u; + op->a[3].val = (Unit.val << 3) | Flags.val; op->a[4] = Size; op->a[5] = Dst; op->next = NULL; @@ -3332,8 +3332,8 @@ gen_get_binary2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, GenOpArg Live, } else { op->op = genop_i_bs_get_binary_all2_5; op->arity = 5; - op->a[0] = Fail; - op->a[1] = Ms; + op->a[0] = Ms; + op->a[1] = Fail; op->a[2] = Live; op->a[3] = Unit; op->a[4] = Dst; @@ -3341,8 +3341,8 @@ gen_get_binary2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, GenOpArg Live, } else if (Size.type == TAG_i) { op->op = genop_i_bs_get_binary_imm2_6; op->arity = 6; - op->a[0] = Fail; - op->a[1] = Ms; + op->a[0] = Ms; + op->a[1] = Fail; op->a[2] = Live; op->a[3].type = TAG_u; if (!safe_mul(Size.val, Unit.val, &op->a[3].val)) { @@ -3362,8 +3362,8 @@ gen_get_binary2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, GenOpArg Live, } else { op->op = genop_i_bs_get_binary_imm2_6; op->arity = 6; - op->a[0] = Fail; - op->a[1] = Ms; + op->a[0] = Ms; + op->a[1] = Fail; op->a[2] = Live; op->a[3].type = TAG_u; if (!safe_mul(bigval, Unit.val, &op->a[3].val)) { @@ -3375,8 +3375,8 @@ gen_get_binary2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, GenOpArg Live, } else { op->op = genop_i_bs_get_binary2_6; op->arity = 6; - op->a[0] = Fail; - op->a[1] = Ms; + op->a[0] = Ms; + op->a[1] = Fail; op->a[2] = Live; op->a[3] = Size; op->a[4].type = TAG_u; @@ -3541,8 +3541,8 @@ gen_get_float2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, GenOpArg Live, NATIVE_ENDIAN(Flags); op->op = genop_i_bs_get_float2_6; op->arity = 6; - op->a[0] = Fail; - op->a[1] = Ms; + op->a[0] = Ms; + op->a[1] = Fail; op->a[2] = Live; op->a[3] = Size; op->a[4].type = TAG_u; @@ -3601,9 +3601,9 @@ gen_skip_bits2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, } else { op->op = genop_i_bs_skip_bits2_4; op->arity = 4; - op->a[0] = Fail; - op->a[1] = Ms; - op->a[2] = Size; + op->a[0] = Ms; + op->a[1] = Size; + op->a[2] = Fail; op->a[3] = Unit; } op->next = NULL; diff --git a/erts/emulator/beam/bs_instrs.tab b/erts/emulator/beam/bs_instrs.tab index 2dde70c2e1..714f1d49ce 100644 --- a/erts/emulator/beam/bs_instrs.tab +++ b/erts/emulator/beam/bs_instrs.tab @@ -90,12 +90,22 @@ TEST_BIN_VHEAP(VNh, Nh, Live) { HEAP_SPACE_VERIFIED(need); } -i_bs_get_binary_all2(Fail, Ms, Live, Unit, Dst) { +i_bs_get_binary_all2 := i_bs_get_binary_all2.fetch.execute; + +i_bs_get_binary_all2.head() { + Eterm context; +} + +i_bs_get_binary_all2.fetch(Ctx) { + context = $Ctx; +} + +i_bs_get_binary_all2.execute(Fail, Live, Unit, Dst) { ErlBinMatchBuffer *_mb; Eterm _result; - $GC_TEST(0, ERL_SUB_BIN_SIZE, $Live); - _mb = ms_matchbuffer($Ms); + $GC_TEST_PRESERVE(ERL_SUB_BIN_SIZE, $Live, context); + _mb = ms_matchbuffer(context); if (((_mb->size - _mb->offset) % $Unit) == 0) { LIGHT_SWAPOUT; _result = erts_bs_get_binary_all_2(c_p, _mb); @@ -109,14 +119,23 @@ i_bs_get_binary_all2(Fail, Ms, Live, Unit, Dst) { $FAIL($Fail); } } +i_bs_get_binary2 := i_bs_get_binary2.fetch.execute; + +i_bs_get_binary2.head() { + Eterm context; +} -i_bs_get_binary2(Fail, Ms, Live, Sz, Flags, Dst) { +i_bs_get_binary2.fetch(Ctx) { + context = $Ctx; +} + +i_bs_get_binary2.execute(Fail, Live, Sz, Flags, Dst) { ErlBinMatchBuffer *_mb; Eterm _result; Uint _size; $BS_GET_FIELD_SIZE($Sz, (($Flags) >> 3), $FAIL($Fail), _size); - $GC_TEST(0, ERL_SUB_BIN_SIZE, $Live); - _mb = ms_matchbuffer($Ms); + $GC_TEST_PRESERVE(ERL_SUB_BIN_SIZE, $Live, context); + _mb = ms_matchbuffer(context); LIGHT_SWAPOUT; _result = erts_bs_get_binary_2(c_p, _size, $Flags, _mb); LIGHT_SWAPIN; @@ -129,11 +148,22 @@ i_bs_get_binary2(Fail, Ms, Live, Sz, Flags, Dst) { } } -i_bs_get_binary_imm2(Fail, Ms, Live, Sz, Flags, Dst) { +i_bs_get_binary_imm2 := i_bs_get_binary_imm2.fetch.execute; + +i_bs_get_binary_imm2.head() { + Eterm context; +} + +i_bs_get_binary_imm2.fetch(Ctx) { + context = $Ctx; +} + +i_bs_get_binary_imm2.execute(Fail, Live, Sz, Flags, Dst) { ErlBinMatchBuffer *_mb; Eterm _result; - $GC_TEST(0, heap_bin_size(ERL_ONHEAP_BIN_LIMIT), $Live); - _mb = ms_matchbuffer($Ms); + $GC_TEST_PRESERVE(heap_bin_size(ERL_ONHEAP_BIN_LIMIT), + $Live, context); + _mb = ms_matchbuffer(context); LIGHT_SWAPOUT; _result = erts_bs_get_binary_2(c_p, $Sz, $Flags, _mb); LIGHT_SWAPIN; @@ -145,8 +175,17 @@ i_bs_get_binary_imm2(Fail, Ms, Live, Sz, Flags, Dst) { $Dst = _result; } } +i_bs_get_float2 := i_bs_get_float2.fetch.execute; + +i_bs_get_float2.head() { + Eterm context; +} + +i_bs_get_float2.fetch(Ctx) { + context = $Ctx; +} -i_bs_get_float2(Fail, Ms, Live, Sz, Flags, Dst) { +i_bs_get_float2.execute(Fail, Live, Sz, Flags, Dst) { ErlBinMatchBuffer *_mb; Eterm _result; Sint _size; @@ -155,8 +194,8 @@ i_bs_get_float2(Fail, Ms, Live, Sz, Flags, Dst) { $FAIL($Fail); } _size *= (($Flags) >> 3); - $GC_TEST(0, FLOAT_SIZE_OBJECT, $Live); - _mb = ms_matchbuffer($Ms); + $GC_TEST_PRESERVE(FLOAT_SIZE_OBJECT, $Live, context); + _mb = ms_matchbuffer(context); LIGHT_SWAPOUT; _result = erts_bs_get_float_2(c_p, _size, ($Flags), _mb); LIGHT_SWAPIN; @@ -169,13 +208,24 @@ i_bs_get_float2(Fail, Ms, Live, Sz, Flags, Dst) { } } -i_bs_skip_bits2(Fail, Ms, Bits, Unit) { +i_bs_skip_bits2 := i_bs_skip_bits2.fetch.execute; + +i_bs_skip_bits2.head() { + Eterm context, bits; +} + +i_bs_skip_bits2.fetch(Ctx, Bits) { + context = $Ctx; + bits = $Bits; +} + +i_bs_skip_bits2.execute(Fail, Unit) { ErlBinMatchBuffer *_mb; size_t new_offset; Uint _size; - _mb = ms_matchbuffer($Ms); - $BS_GET_FIELD_SIZE($Bits, $Unit, $FAIL($Fail), _size); + _mb = ms_matchbuffer(context); + $BS_GET_FIELD_SIZE(bits, $Unit, $FAIL($Fail), _size); new_offset = _mb->offset + _size; if (new_offset <= _mb->size) { _mb->offset = new_offset; @@ -809,9 +859,19 @@ bs_test_unit8(Fail, Ctx) { } } -i_bs_get_integer_8(Ctx, Fail, Dst) { +i_bs_get_integer_8 := i_bs_get_integer_8.fetch.execute; + +i_bs_get_integer_8.head() { + Eterm context; +} + +i_bs_get_integer_8.fetch(Ctx) { + context = $Ctx; +} + +i_bs_get_integer_8.execute(Fail, Dst) { Eterm _result; - ErlBinMatchBuffer* _mb = ms_matchbuffer($Ctx); + ErlBinMatchBuffer* _mb = ms_matchbuffer(context); if (_mb->size - _mb->offset < 8) { $FAIL($Fail); @@ -825,9 +885,19 @@ i_bs_get_integer_8(Ctx, Fail, Dst) { $Dst = _result; } -i_bs_get_integer_16(Ctx, Fail, Dst) { +i_bs_get_integer_16 := i_bs_get_integer_16.fetch.execute; + +i_bs_get_integer_16.head() { + Eterm context; +} + +i_bs_get_integer_16.fetch(Ctx) { + context = $Ctx; +} + +i_bs_get_integer_16.execute(Fail, Dst) { Eterm _result; - ErlBinMatchBuffer* _mb = ms_matchbuffer($Ctx); + ErlBinMatchBuffer* _mb = ms_matchbuffer(context); if (_mb->size - _mb->offset < 16) { $FAIL($Fail); @@ -842,9 +912,19 @@ i_bs_get_integer_16(Ctx, Fail, Dst) { } %if ARCH_64 -i_bs_get_integer_32(Ctx, Fail, Dst) { +i_bs_get_integer_32 := i_bs_get_integer_32.fetch.execute; + +i_bs_get_integer_32.head() { + Eterm context; +} + +i_bs_get_integer_32.fetch(Ctx) { + context = $Ctx; +} + +i_bs_get_integer_32.execute(Fail, Dst) { Uint32 _integer; - ErlBinMatchBuffer* _mb = ms_matchbuffer($Ctx); + ErlBinMatchBuffer* _mb = ms_matchbuffer(context); if (_mb->size - _mb->offset < 32) { $FAIL($Fail); @@ -894,15 +974,23 @@ bs_get_integer.execute(Fail, Flags, Dst) { $Dst = result; } -i_bs_get_integer(Fail, Live, FlagsAndUnit, Ms, Sz, Dst) { +i_bs_get_integer := i_bs_get_integer.fetch.execute; + +i_bs_get_integer.head() { + Eterm context; +} + +i_bs_get_integer.fetch(Ctx) { + context = $Ctx; +} + +i_bs_get_integer.execute(Fail, Live, FlagsAndUnit, Sz, Dst) { Uint flags; Uint size; - Eterm ms; ErlBinMatchBuffer* mb; Eterm result; flags = $FlagsAndUnit; - ms = $Ms; $BS_GET_FIELD_SIZE($Sz, (flags >> 3), $FAIL($Fail), size); if (size >= SMALL_BITS) { Uint wordsneeded; @@ -913,15 +1001,15 @@ i_bs_get_integer(Fail, Live, FlagsAndUnit, Ms, Sz, Dst) { * Remember to re-acquire the matchbuffer after gc. */ - mb = ms_matchbuffer(ms); + mb = ms_matchbuffer(context); if (mb->size - mb->offset < size) { $FAIL($Fail); } wordsneeded = 1+WSIZE(NBYTES((Uint) size)); - $GC_TEST_PRESERVE(wordsneeded, $Live, ms); + $GC_TEST_PRESERVE(wordsneeded, $Live, context); $REFRESH_GEN_DEST(); } - mb = ms_matchbuffer(ms); + mb = ms_matchbuffer(context); LIGHT_SWAPOUT; result = erts_bs_get_integer_2(c_p, size, flags, mb); LIGHT_SWAPIN; @@ -932,9 +1020,19 @@ i_bs_get_integer(Fail, Live, FlagsAndUnit, Ms, Sz, Dst) { $Dst = result; } -i_bs_get_utf8(Ctx, Fail, Dst) { +i_bs_get_utf8 := i_bs_get_utf8.fetch.execute; + +i_bs_get_utf8.head() { + Eterm context; +} + +i_bs_get_utf8.fetch(Ctx) { + context = $Ctx; +} + +i_bs_get_utf8.execute(Fail, Dst) { Eterm result; - ErlBinMatchBuffer* mb = ms_matchbuffer($Ctx); + ErlBinMatchBuffer* mb = ms_matchbuffer(context); if (mb->size - mb->offset < 8) { $FAIL($Fail); @@ -957,8 +1055,18 @@ i_bs_get_utf8(Ctx, Fail, Dst) { $Dst = result; } -i_bs_get_utf16(Ctx, Fail, Flags, Dst) { - ErlBinMatchBuffer* mb = ms_matchbuffer($Ctx); +i_bs_get_utf16 := i_bs_get_utf16.fetch.execute; + +i_bs_get_utf16.head() { + Eterm context; +} + +i_bs_get_utf16.fetch(Ctx) { + context = $Ctx; +} + +i_bs_get_utf16.execute(Fail, Flags, Dst) { + ErlBinMatchBuffer* mb = ms_matchbuffer(context); Eterm result = erts_bs_get_utf16(mb, $Flags); if (is_non_value(result)) { @@ -1055,13 +1163,20 @@ i_bs_restore2(Src, Slot) { _ms->mb.offset = _ms->save_offset[$Slot]; } -bs_get_tail(Src, Dst, Live) { - ErlBinMatchBuffer* mb; - Uint size, offs; - ErlSubBin* sb; +bs_get_tail := bs_get_tail.fetch.execute; + +bs_get_tail.head() { Eterm context; +} +bs_get_tail.fetch(Src) { context = $Src; +} + +bs_get_tail.execute(Dst, Live) { + ErlBinMatchBuffer* mb; + Uint size, offs; + ErlSubBin* sb; ASSERT(header_is_bin_matchstate(*boxed_val(context))); @@ -1090,11 +1205,20 @@ bs_get_tail(Src, Dst, Live) { %if ARCH_64 -i_bs_start_match3_gp(Src, Live, Fail, Dst, Pos) { - Eterm context, header; - Uint position, live; +i_bs_start_match3_gp := i_bs_start_match3_gp.fetch.execute; +i_bs_start_match3_gp.head() { + Eterm context; +} + +i_bs_start_match3_gp.fetch(Src) { context = $Src; +} + +i_bs_start_match3_gp.execute(Live, Fail, Dst, Pos) { + Eterm header; + Uint position, live; + live = $Live; if (!is_boxed(context)) { @@ -1139,11 +1263,20 @@ i_bs_start_match3_gp(Src, Live, Fail, Dst, Pos) { $Pos = make_small(position); } -i_bs_start_match3(Src, Live, Fail, Dst) { - Eterm context, header; - Uint live; +i_bs_start_match3 := i_bs_start_match3.fetch.execute; + +i_bs_start_match3.head() { + Eterm context; +} +i_bs_start_match3.fetch(Src) { context = $Src; +} + +i_bs_start_match3.execute(Live, Fail, Dst) { + Eterm header; + Uint live; + live = $Live; if (!is_boxed(context)) { @@ -1261,11 +1394,20 @@ bs_get_position(Ctx, Dst, Live) { } } -i_bs_start_match3(Src, Live, Fail, Dst) { - Eterm context, header; - Uint live; +i_bs_start_match3 := i_bs_start_match3.fetch.execute; +i_bs_start_match3.head() { + Eterm context; +} + +i_bs_start_match3.fetch(Src) { context = $Src; +} + +i_bs_start_match3.execute(Live, Fail, Dst) { + Eterm header; + Uint live; + live = $Live; if (!is_boxed(context)) { diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 969d2728a5..2f00b82c1e 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -1160,7 +1160,7 @@ i_bs_get_integer_imm Ms Bits Live Fail Flags Y=y => \ i_bs_get_integer_small_imm xy W f? t x i_bs_get_integer_imm xy W t f? t x -i_bs_get_integer f? t t xy s d +i_bs_get_integer xy f? t t s d i_bs_get_integer_8 xy f? d i_bs_get_integer_16 xy f? d @@ -1172,9 +1172,9 @@ i_bs_get_integer_32 xy f? d bs_get_binary2 Fail=f Ms=xy Live=u Sz=sq Unit=u Flags=u Dst=d => \ gen_get_binary2(Fail, Ms, Live, Sz, Unit, Flags, Dst) -i_bs_get_binary_imm2 f? xy t W t d -i_bs_get_binary2 f xy t? s t d -i_bs_get_binary_all2 f? xy t t d +i_bs_get_binary_imm2 xy f? t W t d +i_bs_get_binary2 xy f t? s t d +i_bs_get_binary_all2 xy f? t t d i_bs_get_binary_all_reuse xy f? t # Fetching float from binaries. @@ -1183,7 +1183,7 @@ bs_get_float2 Fail=f Ms=xy Live=u Sz=s Unit=u Flags=u Dst=d => \ bs_get_float2 Fail=f Ms=x Live=u Sz=q Unit=u Flags=u Dst=d => jump Fail -i_bs_get_float2 f? xy t s t d +i_bs_get_float2 xy f? t s t d # Miscellanous @@ -1191,7 +1191,7 @@ bs_skip_bits2 Fail=f Ms=xy Sz=sq Unit=u Flags=u => \ gen_skip_bits2(Fail, Ms, Sz, Unit, Flags) i_bs_skip_bits_imm2 f? xy W -i_bs_skip_bits2 f? xy xy t +i_bs_skip_bits2 xy xy f? t i_bs_skip_bits_all2 f? xy t bs_test_tail2 Fail=f Ms=xy Bits=u==0 => bs_test_zero_tail2 Fail Ms -- cgit v1.2.3 From db9a338a0480067a6f05551ce62c33f3aaf1a08a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 4 Mar 2019 14:38:28 +0100 Subject: Optimize field size calculation on a 64-bit architecture On a 64-bit architecture, the size of any binary that would fit in the memory must fit in a small, so we can fail immediately if the size term is not a small. --- erts/emulator/beam/bs_instrs.tab | 44 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/bs_instrs.tab b/erts/emulator/beam/bs_instrs.tab index 714f1d49ce..4cf7faffb7 100644 --- a/erts/emulator/beam/bs_instrs.tab +++ b/erts/emulator/beam/bs_instrs.tab @@ -21,12 +21,50 @@ %if ARCH_64 BS_SAFE_MUL(A, B, Fail, Dst) { - Uint64 res = ($A) * ($B); - if (res / $B != $A) { + Uint a = $A; + Uint b = $B; + Uint res = a * b; + if (res / b != a) { $Fail; } $Dst = res; } + +BS_GET_FIELD_SIZE(Bits, Unit, Fail, Dst) { + if (is_small($Bits)) { + Uint uint_size; + Sint signed_size = signed_val($Bits); + if (signed_size < 0) { + $Fail; + } + uint_size = (Uint) signed_size; + $BS_SAFE_MUL(uint_size, $Unit, $Fail, $Dst); + } else { + /* + * On a 64-bit architecture, the size of any binary + * that would fit in the memory fits in a small. + */ + $Fail; + } +} + +BS_GET_UNCHECKED_FIELD_SIZE(Bits, Unit, Fail, Dst) { + if (is_small($Bits)) { + Uint uint_size; + Sint signed_size = signed_val($Bits); + if (signed_size < 0) { + $Fail; + } + uint_size = (Uint) signed_size; + $Dst = uint_size * $Unit; + } else { + /* + * On a 64-bit architecture, the size of any binary + * that would fit in the memory fits in a small. + */ + $Fail; + } +} %else BS_SAFE_MUL(A, B, Fail, Dst) { Uint64 res = (Uint64)($A) * (Uint64)($B); @@ -35,7 +73,6 @@ BS_SAFE_MUL(A, B, Fail, Dst) { } $Dst = res; } -%endif BS_GET_FIELD_SIZE(Bits, Unit, Fail, Dst) { Sint signed_size; @@ -76,6 +113,7 @@ BS_GET_UNCHECKED_FIELD_SIZE(Bits, Unit, Fail, Dst) { } $Dst = uint_size * $Unit; } +%endif TEST_BIN_VHEAP(VNh, Nh, Live) { Uint need = $Nh; -- cgit v1.2.3 From b96e5bd87c6aaaf96fa9c6e3679d95df74d1a499 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 5 Mar 2019 05:44:48 +0100 Subject: Eliminate unused i_bs_skip_bits_all2 instruction Starting in OTP 19 (in commit 9504c0dd71d0), the compiler emits a test_unit instruction instead of a skip instruction at the end of binary. We can do the same replacement in the loader to get rid of the i_bs_skip_bits_all2 instruction. --- erts/emulator/beam/beam_load.c | 16 ++++++++++++++-- erts/emulator/beam/bs_instrs.tab | 10 ---------- erts/emulator/beam/ops.tab | 1 - 3 files changed, 14 insertions(+), 13 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 5aaba78de4..e3232f1beb 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -3565,10 +3565,22 @@ gen_skip_bits2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, NATIVE_ENDIAN(Flags); NEW_GENOP(stp, op); if (Size.type == TAG_a && Size.val == am_all) { - op->op = genop_i_bs_skip_bits_all2_3; + /* + * This kind of skip instruction will only be found in modules + * compiled before OTP 19. From OTP 19, the compiler generates + * a test_unit instruction of a bs_skip at the end of a + * binary. + * + * It is safe to replace the skip instruction with a test_unit + * instruction, because the position will never be used again. + * If the match context itself is used again, it will be used by + * a bs_restore2 instruction which will overwrite the position + * by one of the stored positions. + */ + op->op = genop_bs_test_unit_3; op->arity = 3; op->a[0] = Fail; - op->a[1] = Ms; + op->a[1] = Ms; op->a[2] = Unit; } else if (Size.type == TAG_i) { op->op = genop_i_bs_skip_bits_imm2_3; diff --git a/erts/emulator/beam/bs_instrs.tab b/erts/emulator/beam/bs_instrs.tab index 4cf7faffb7..10f43cd786 100644 --- a/erts/emulator/beam/bs_instrs.tab +++ b/erts/emulator/beam/bs_instrs.tab @@ -272,16 +272,6 @@ i_bs_skip_bits2.execute(Fail, Unit) { } } -i_bs_skip_bits_all2(Fail, Ms, Unit) { - ErlBinMatchBuffer *_mb; - _mb = ms_matchbuffer($Ms); - if (((_mb->size - _mb->offset) % $Unit) == 0) { - _mb->offset = _mb->size; - } else { - $FAIL($Fail); - } -} - i_bs_skip_bits_imm2(Fail, Ms, Bits) { ErlBinMatchBuffer *_mb; size_t new_offset; diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 2f00b82c1e..4e7e5f5de1 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -1192,7 +1192,6 @@ bs_skip_bits2 Fail=f Ms=xy Sz=sq Unit=u Flags=u => \ i_bs_skip_bits_imm2 f? xy W i_bs_skip_bits2 xy xy f? t -i_bs_skip_bits_all2 f? xy t bs_test_tail2 Fail=f Ms=xy Bits=u==0 => bs_test_zero_tail2 Fail Ms bs_test_tail2 Fail=f Ms=xy Bits=u => bs_test_tail_imm2 Fail Ms Bits -- cgit v1.2.3 From 405aca76c4bbc47352788858bf0c0749fb1f730d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 5 Mar 2019 12:28:57 +0100 Subject: sys.h: Check for overflow checking aritmethic builtins Let sys.h define HAVE_OVERFLOW_CHECK_BUILTINS if the compiler supports __builtin_mul_overflow() and the other overflow checking builtins. The test is intentionally made in a sys.h and not as a configure test. On Windows, beam_emu.c is always compiled using gcc, but the other files are usually compiled with Microsoft's C compiler. With the test in the header file, HAVE_OVERFLOW_CHECK_BUILTINS will be defined when compiling beam_emu.c. --- erts/emulator/beam/sys.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'erts') diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h index a69da4d762..a6312293cc 100644 --- a/erts/emulator/beam/sys.h +++ b/erts/emulator/beam/sys.h @@ -111,6 +111,23 @@ #endif #endif +/* + * Test for clang's convenient __has_builtin feature checking macro. + */ +#ifndef __has_builtin + #define __has_builtin(x) 0 +#endif + +/* + * Define HAVE_OVERFLOW_CHECK_BUILTINS if the overflow checking arithmetic + * builtins are available. + */ +#if ERTS_AT_LEAST_GCC_VSN__(5, 1, 0) +# define HAVE_OVERFLOW_CHECK_BUILTINS 1 +#elif __has_builtin(__builtin_mul_overflow) +# define HAVE_OVERFLOW_CHECK_BUILTINS 1 +#endif + #include "erl_misc_utils.h" /* -- cgit v1.2.3 From 9e18956fcb279d33ae00d82db7382b81bad7dcc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 5 Mar 2019 10:20:33 +0100 Subject: Optimize multiplication in binary matching instructions --- erts/emulator/beam/bs_instrs.tab | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'erts') diff --git a/erts/emulator/beam/bs_instrs.tab b/erts/emulator/beam/bs_instrs.tab index 10f43cd786..493ec10222 100644 --- a/erts/emulator/beam/bs_instrs.tab +++ b/erts/emulator/beam/bs_instrs.tab @@ -23,10 +23,17 @@ BS_SAFE_MUL(A, B, Fail, Dst) { Uint a = $A; Uint b = $B; - Uint res = a * b; + Uint res; +#ifdef HAVE_OVERFLOW_CHECK_BUILTINS + if (__builtin_mul_overflow(a, b, &res)) { + $Fail; + } +#else + res = a * b; if (res / b != a) { $Fail; } +#endif $Dst = res; } -- cgit v1.2.3 From 8eb6e937c4c5aa4c86142f37f1455637f7e8a20a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 5 Mar 2019 12:10:18 +0100 Subject: Optimize the '*' operator when multiplying two small integers --- erts/emulator/beam/arith_instrs.tab | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'erts') diff --git a/erts/emulator/beam/arith_instrs.tab b/erts/emulator/beam/arith_instrs.tab index 574fceec5b..5f23b2c168 100644 --- a/erts/emulator/beam/arith_instrs.tab +++ b/erts/emulator/beam/arith_instrs.tab @@ -116,6 +116,17 @@ increment.execute(IncrementVal, Dst) { i_times(Fail, Op1, Op2, Dst) { Eterm op1 = $Op1; Eterm op2 = $Op2; +#ifdef HAVE_OVERFLOW_CHECK_BUILTINS + if (ERTS_LIKELY(is_both_small(op1, op2))) { + Sint a = signed_val(op1); + Sint b = signed_val(op2); + Sint res; + if (ERTS_LIKELY(!__builtin_mul_overflow(a, b, &res) && IS_SSMALL(res))) { + $Dst = make_small(res); + $NEXT0(); + } + } +#endif $OUTLINED_ARITH_2($Fail, mixed_times, BIF_stimes_2, op1, op2, $Dst); } -- cgit v1.2.3 From 7128f182ac4051b45bb0f526d8983f5ada1e12f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 5 Mar 2019 16:37:08 +0100 Subject: Slightly optimize is_eq and is_ne --- erts/emulator/beam/instrs.tab | 12 ++++++++---- erts/emulator/beam/ops.tab | 8 ++++++-- 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/instrs.tab b/erts/emulator/beam/instrs.tab index 5b81517359..1eb83b61f2 100644 --- a/erts/emulator/beam/instrs.tab +++ b/erts/emulator/beam/instrs.tab @@ -884,12 +884,16 @@ i_is_ne_exact_literal(Fail, Src, Literal) { } } -is_eq(Fail, X, Y) { - CMP_EQ_ACTION($X, $Y, $FAIL($Fail)); +is_eq(Fail, A, B) { + Eterm a = $A; + Eterm b = $B; + CMP_EQ_ACTION(a, b, $FAIL($Fail)); } -is_ne(Fail, X, Y) { - CMP_NE_ACTION($X, $Y, $FAIL($Fail)); +is_ne(Fail, A, B) { + Eterm a = $A; + Eterm b = $B; + CMP_NE_ACTION(a, b, $FAIL($Fail)); } is_lt(Fail, X, Y) { diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 4e7e5f5de1..ef26afc10a 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -489,9 +489,13 @@ is_ge f? c x is_ge f? s s %hot -is_eq f? s s +is_eq Fail=f Const=c Reg=xy => is_eq Fail Reg Const +is_eq Fail=f C1=c C2=c => move C1 x | is_eq Fail x C2 +is_eq f? S s -is_ne f? s s +is_ne Fail=f Const=c Reg=xy => is_ne Fail Reg Const +is_ne Fail=f C1=c C2=c => move C1 x | is_ne Fail x C2 +is_ne f? S s # # Putting tuples. -- cgit v1.2.3 From 2de437ef15d1cddf70a0553437b678f9bca5f35c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 6 Mar 2019 06:23:24 +0100 Subject: Slightly optimize binary construction Use S operands instead of s operands for a slight speed increase and reduction in code size of process_main(). Use micro instructions for frequently executed instructions. While at it, use safe multiplication in gen_get_integer() in beam_load.c. --- erts/emulator/beam/beam_load.c | 67 +++++++++++++++++++++++++------------ erts/emulator/beam/bs_instrs.tab | 72 ++++++++++++++++++++++++++++++---------- erts/emulator/beam/ops.tab | 49 +++++++++++++++++---------- 3 files changed, 132 insertions(+), 56 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index e3232f1beb..43adf7a5e0 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -3409,8 +3409,8 @@ gen_put_binary(LoaderState* stp, GenOpArg Fail,GenOpArg Size, if (Size.type == TAG_a && Size.val == am_all) { op->op = genop_i_new_bs_put_binary_all_3; op->arity = 3; - op->a[0] = Fail; - op->a[1] = Src; + op->a[0] = Src; + op->a[1] = Fail; op->a[2] = Unit; } else if (Size.type == TAG_i) { op->op = genop_i_new_bs_put_binary_imm_3; @@ -3420,10 +3420,33 @@ gen_put_binary(LoaderState* stp, GenOpArg Fail,GenOpArg Size, if (safe_mul(Size.val, Unit.val, &op->a[1].val)) { op->a[2] = Src; } else { + error: op->op = genop_badarg_1; op->arity = 1; op->a[0] = Fail; } + } else if (Size.type == TAG_q) { +#ifdef ARCH_64 + /* + * There is no way that this binary would fit in memory. + */ + goto error; +#else + Eterm big = stp->literals[Size.val].term; + Uint bigval; + Uint size; + + if (!term_to_Uint(big, &bigval) || + !safe_mul(bigval, Unit.val, &size)) { + goto error; + } + op->op = genop_i_new_bs_put_binary_imm_3; + op->arity = 3; + op->a[0] = Fail; + op->a[1].type = TAG_u; + op->a[1].val = size; + op->a[2] = Src; +#endif } else { op->op = genop_i_new_bs_put_binary_4; op->arity = 4; @@ -3448,11 +3471,8 @@ gen_put_integer(LoaderState* stp, GenOpArg Fail, GenOpArg Size, NATIVE_ENDIAN(Flags); /* Negative size must fail */ if (Size.type == TAG_i) { - op->op = genop_i_new_bs_put_integer_imm_4; - op->arity = 4; - op->a[0] = Fail; - op->a[1].type = TAG_u; - if (!safe_mul(Size.val, Unit.val, &op->a[1].val)) { + Uint size; + if (!safe_mul(Size.val, Unit.val, &size)) { error: op->op = genop_badarg_1; op->arity = 1; @@ -3460,26 +3480,31 @@ gen_put_integer(LoaderState* stp, GenOpArg Fail, GenOpArg Size, op->next = NULL; return op; } - op->a[1].val = Size.val * Unit.val; - op->a[2].type = Flags.type; - op->a[2].val = (Flags.val & 7); - op->a[3] = Src; + op->op = genop_i_new_bs_put_integer_imm_4; + op->arity = 4; + op->a[0] = Src; + op->a[1] = Fail; + op->a[2].type = TAG_u; + op->a[2].val = size; + op->a[3].type = Flags.type; + op->a[3].val = (Flags.val & 7); } else if (Size.type == TAG_q) { Eterm big = stp->literals[Size.val].term; Uint bigval; + Uint size; - if (!term_to_Uint(big, &bigval)) { + if (!term_to_Uint(big, &bigval) || + !safe_mul(bigval, Unit.val, &size)) { goto error; - } else { - op->op = genop_i_new_bs_put_integer_imm_4; - op->arity = 4; - op->a[0] = Fail; - op->a[1].type = TAG_u; - op->a[1].val = bigval * Unit.val; - op->a[2].type = Flags.type; - op->a[2].val = (Flags.val & 7); - op->a[3] = Src; } + op->op = genop_i_new_bs_put_integer_imm_4; + op->arity = 4; + op->a[0] = Src; + op->a[1] = Fail; + op->a[2].type = TAG_u; + op->a[2].val = size; + op->a[3].type = Flags.type; + op->a[3].val = (Flags.val & 7); } else { op->op = genop_i_new_bs_put_integer_4; op->arity = 4; diff --git a/erts/emulator/beam/bs_instrs.tab b/erts/emulator/beam/bs_instrs.tab index 493ec10222..652460a66d 100644 --- a/erts/emulator/beam/bs_instrs.tab +++ b/erts/emulator/beam/bs_instrs.tab @@ -292,15 +292,25 @@ i_bs_skip_bits_imm2(Fail, Ms, Bits) { } i_new_bs_put_binary(Fail, Sz, Flags, Src) { + Eterm sz = $Sz; Sint _size; - $BS_GET_UNCHECKED_FIELD_SIZE($Sz, (($Flags) >> 3), $BADARG($Fail), _size); + $BS_GET_UNCHECKED_FIELD_SIZE(sz, (($Flags) >> 3), $BADARG($Fail), _size); if (!erts_new_bs_put_binary(ERL_BITS_ARGS_2(($Src), _size))) { $BADARG($Fail); } } +i_new_bs_put_binary_all := i_new_bs_put_binary_all.fetch.execute; -i_new_bs_put_binary_all(Fail, Src, Unit) { - if (!erts_new_bs_put_binary_all(ERL_BITS_ARGS_2(($Src), ($Unit)))) { +i_new_bs_put_binary_all.head() { + Eterm src; +} + +i_new_bs_put_binary_all.fetch(Src) { + src = $Src; +} + +i_new_bs_put_binary_all.execute(Fail, Unit) { + if (!erts_new_bs_put_binary_all(ERL_BITS_ARGS_2(src, ($Unit)))) { $BADARG($Fail); } } @@ -312,9 +322,11 @@ i_new_bs_put_binary_imm(Fail, Sz, Src) { } i_new_bs_put_float(Fail, Sz, Flags, Src) { + Eterm sz = $Sz; + Eterm flags = $Flags; Sint _size; - $BS_GET_UNCHECKED_FIELD_SIZE($Sz, (($Flags) >> 3), $BADARG($Fail), _size); - if (!erts_new_bs_put_float(c_p, ($Src), _size, ($Flags))) { + $BS_GET_UNCHECKED_FIELD_SIZE(sz, (flags >> 3), $BADARG($Fail), _size); + if (!erts_new_bs_put_float(c_p, ($Src), _size, flags)) { $BADARG($Fail); } } @@ -326,15 +338,27 @@ i_new_bs_put_float_imm(Fail, Sz, Flags, Src) { } i_new_bs_put_integer(Fail, Sz, Flags, Src) { - Sint _size; - $BS_GET_UNCHECKED_FIELD_SIZE($Sz, (($Flags) >> 3), $BADARG($Fail), _size); - if (!erts_new_bs_put_integer(ERL_BITS_ARGS_3(($Src), _size, ($Flags)))) { - $BADARG($Fail); - } + Eterm sz = $Sz; + Eterm flags = $Flags; + Sint _size; + $BS_GET_UNCHECKED_FIELD_SIZE(sz, (flags >> 3), $BADARG($Fail), _size); + if (!erts_new_bs_put_integer(ERL_BITS_ARGS_3(($Src), _size, flags))) { + $BADARG($Fail); + } } -i_new_bs_put_integer_imm(Fail, Sz, Flags, Src) { - if (!erts_new_bs_put_integer(ERL_BITS_ARGS_3(($Src), ($Sz), ($Flags)))) { +i_new_bs_put_integer_imm := i_new_bs_put_integer_imm.fetch.execute; + +i_new_bs_put_integer_imm.head() { + Eterm src; +} + +i_new_bs_put_integer_imm.fetch(Src) { + src = $Src; +} + +i_new_bs_put_integer_imm.execute(Fail, Sz, Flags) { + if (!erts_new_bs_put_integer(ERL_BITS_ARGS_3(src, ($Sz), ($Flags)))) { $BADARG($Fail); } } @@ -1381,12 +1405,19 @@ i_bs_get_position(Ctx, Dst) { # match at a position beyond 16MB. # -bs_set_position(Ctx, Pos) { +bs_set_position := bs_set_position.fetch.execute; + +bs_set_position.head() { Eterm context, position; - ErlBinMatchState *ms; +} +bs_set_position.fetch(Ctx, Pos) { context = $Ctx; position = $Pos; +} + +bs_set_position.execute() { + ErlBinMatchState *ms; ASSERT(header_is_bin_matchstate(*boxed_val(context))); ms = (ErlBinMatchState*)boxed_val(context); @@ -1399,12 +1430,19 @@ bs_set_position(Ctx, Pos) { } } -bs_get_position(Ctx, Dst, Live) { - ErlBinMatchState *ms; +bs_get_position := bs_get_position.fetch.execute; + +bs_get_position.head() { Eterm context; - Uint position; +} +bs_get_position.fetch(Ctx) { context = $Ctx; +} + +bs_get_position.execute(Dst, Live) { + ErlBinMatchState *ms; + Uint position; ASSERT(header_is_bin_matchstate(*boxed_val(context))); ms = (ErlBinMatchState*)boxed_val(context); diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index ef26afc10a..e688c6996b 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -1331,31 +1331,35 @@ i_bs_private_append j? t s S x bs_put_integer Fail=j Sz=sq Unit=u Flags=u Src=s => \ gen_put_integer(Fail, Sz, Unit, Flags, Src) -i_new_bs_put_integer j? s t s -i_new_bs_put_integer_imm j? W t s +i_new_bs_put_integer j? S t s +i_new_bs_put_integer_imm xyc j? W t # # Utf8/utf16/utf32 support. (R12B-5) # -bs_utf8_size j Src=s Dst=d => i_bs_utf8_size Src Dst +bs_utf8_size j Src Dst=d => i_bs_utf8_size Src Dst +bs_utf16_size j Src Dst=d => i_bs_utf16_size Src Dst -i_bs_utf8_size s x +bs_put_utf8 Fail u Src => i_bs_put_utf8 Fail Src -bs_utf16_size j Src=s Dst=d => i_bs_utf16_size Src Dst - -i_bs_utf16_size s x - -bs_put_utf8 Fail u Src=s => i_bs_put_utf8 Fail Src +bs_put_utf32 Fail=j Flags=u Src=s => \ + i_bs_validate_unicode Fail Src | bs_put_integer Fail i=32 u=1 Flags Src -i_bs_put_utf8 j? s +i_bs_utf8_size S x +i_bs_utf16_size S x -bs_put_utf16 j? t s +i_bs_put_utf8 j? S +bs_put_utf16 j? t S -bs_put_utf32 Fail=j Flags=u Src=s => \ - i_bs_validate_unicode Fail Src | bs_put_integer Fail i=32 u=1 Flags Src +i_bs_validate_unicode j? S -i_bs_validate_unicode j? s +# Handle unoptimized code. +i_bs_utf8_size Src=c Dst => move Src x | i_bs_utf8_size x Dst +i_bs_utf16_size Src=c Dst => move Src x | i_bs_utf16_size x Dst +i_bs_put_utf8 Fail Src=c => move Src x | i_bs_put_utf8 Fail x +bs_put_utf16 Fail Flags Src=c => move Src x | bs_put_utf16 Fail Flags x +i_bs_validate_unicode Fail Src=c => move Src x | i_bs_validate_unicode Fail x # # Storing floats into binaries. @@ -1365,7 +1369,7 @@ bs_put_float Fail Sz=q Unit Flags Val => badarg Fail bs_put_float Fail=j Sz=s Unit=u Flags=u Src=s => \ gen_put_float(Fail, Sz, Unit, Flags, Src) -i_new_bs_put_float j? s t s +i_new_bs_put_float j? S t s i_new_bs_put_float_imm j? W t s # @@ -1375,9 +1379,18 @@ i_new_bs_put_float_imm j? W t s bs_put_binary Fail=j Sz=s Unit=u Flags=u Src=s => \ gen_put_binary(Fail, Sz, Unit, Flags, Src) -i_new_bs_put_binary j? s t s -i_new_bs_put_binary_imm j? W s -i_new_bs_put_binary_all j? s t +# In unoptimized code, the binary argument could be a literal. (In optimized code, +# there would be a bs_put_string instruction.) +i_new_bs_put_binary Fail Size Unit Lit=c => \ + move Lit x | i_new_bs_put_binary Fail Size Unit x +i_new_bs_put_binary_imm Fail Size Lit=c => \ + move Lit x | i_new_bs_put_binary_imm Fail Size x +i_new_bs_put_binary_all Lit=c Fail Unit => \ + move Lit x | i_new_bs_put_binary_all x Fail Unit + +i_new_bs_put_binary j? S t S +i_new_bs_put_binary_imm j? W S +i_new_bs_put_binary_all xy j? t # # Warning: The i_bs_put_string and i_new_bs_put_string instructions -- cgit v1.2.3