From 65d137fd73696604817a866a72a868a65498b500 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 22 Aug 2017 07:14:38 +0200 Subject: Add missing -no_next for badarg instruction --- erts/emulator/beam/instrs.tab | 1 + 1 file changed, 1 insertion(+) diff --git a/erts/emulator/beam/instrs.tab b/erts/emulator/beam/instrs.tab index d45da62d03..1af01e53bd 100644 --- a/erts/emulator/beam/instrs.tab +++ b/erts/emulator/beam/instrs.tab @@ -814,6 +814,7 @@ is_ge(Fail, X, Y) { badarg(Fail) { $BADARG($Fail); + //| -no_next; } badmatch(Src) { -- cgit v1.2.3 From 31f43558f0b6b1652b4d3be96fe83a616e070a0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 22 Aug 2017 07:15:55 +0200 Subject: beam_makeops: Remove unused subroutine save_c_code --- erts/emulator/utils/beam_makeops | 5 ----- 1 file changed, 5 deletions(-) diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops index da69b13e87..f9e22444f3 100755 --- a/erts/emulator/utils/beam_makeops +++ b/erts/emulator/utils/beam_makeops @@ -278,11 +278,6 @@ my $c_code_block; my $c_code_loc; my @c_args; -sub save_c_code { - my($name,$block,$loc,@args) = @_; - -} - while (<>) { my($op_num); if ($in_c_code) { -- cgit v1.2.3 From 83d5a3cfb5e55e6877ddecf3d33dccf0d3197520 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 16 Aug 2017 07:03:13 +0200 Subject: beam_emu: Remove unused macros --- erts/emulator/beam/beam_emu.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index e5935f5f02..8ae5c23e0d 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -154,9 +154,6 @@ do { \ */ #define REG_TARGET_PTR(Target) (((Target) & 1) ? &yb(Target-1) : &xb(Target)) -#define REG_TARGET(Target) (*REG_TARGET_PTR(Target)) - -#define ISCATCHEND(instr) ((Eterm *) *(instr) == OpCode(catch_end_y)) /* * Special Beam instructions. @@ -318,10 +315,6 @@ void** beam_ops; #endif #define Arg(N) I[(N)+1] -#define Next(N) \ - I += (N) + 1; \ - ASSERT(VALID_INSTR(*I)); \ - Goto(*I) #define GetR(pos, tr) \ do { \ -- cgit v1.2.3 From a9ea42890b6ce8fa35aeba5c6de3a45f97802b23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 14 Aug 2017 15:39:41 +0200 Subject: arith_instrs.tab: Eliminate warning for uninitialized value Make it clear to GCC that shift_left_count is initialized. --- erts/emulator/beam/arith_instrs.tab | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/erts/emulator/beam/arith_instrs.tab b/erts/emulator/beam/arith_instrs.tab index 91fe21e161..b8fbd80dad 100644 --- a/erts/emulator/beam/arith_instrs.tab +++ b/erts/emulator/beam/arith_instrs.tab @@ -236,6 +236,7 @@ shift.setup_bsr(Src1, Src2) { Op1 = $Src1; Op2 = $Src2; BIF = BIF_bsr_2; + shift_left_count = 0; if (is_small(Op2)) { shift_left_count = -signed_val(Op2); } else if (is_big(Op2)) { @@ -245,8 +246,6 @@ shift.setup_bsr(Src1, Src2) { */ shift_left_count = make_small(bignum_header_is_neg(*big_val(Op2)) ? MAX_SMALL : MIN_SMALL); - } else { - shift_left_count = 0; } } @@ -254,6 +253,7 @@ shift.setup_bsl(Src1, Src2) { Op1 = $Src1; Op2 = $Src2; BIF = BIF_bsl_2; + shift_left_count = 0; if (is_small(Op2)) { shift_left_count = signed_val(Op2); } else if (is_big(Op2)) { @@ -271,8 +271,6 @@ shift.setup_bsl(Src1, Src2) { */ shift_left_count = MAX_SMALL; } - } else { - shift_left_count = 0; } } -- cgit v1.2.3 From acad85c9831452e5865f2bd3fdd31455b493f091 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 16 Aug 2017 11:41:40 +0200 Subject: Improve performance for bsl/bsr Eliminate the variable used for holding which BIF (bsl or bsr). It seems to improve performance slightly. --- erts/emulator/beam/arith_instrs.tab | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/erts/emulator/beam/arith_instrs.tab b/erts/emulator/beam/arith_instrs.tab index b8fbd80dad..a5c84d41d6 100644 --- a/erts/emulator/beam/arith_instrs.tab +++ b/erts/emulator/beam/arith_instrs.tab @@ -229,13 +229,11 @@ shift.head() { Sint ires; Eterm* bigp; Eterm tmp_big[2]; - Uint BIF; } shift.setup_bsr(Src1, Src2) { Op1 = $Src1; Op2 = $Src2; - BIF = BIF_bsr_2; shift_left_count = 0; if (is_small(Op2)) { shift_left_count = -signed_val(Op2); @@ -252,7 +250,6 @@ shift.setup_bsr(Src1, Src2) { shift.setup_bsl(Src1, Src2) { Op1 = $Src1; Op2 = $Src2; - BIF = BIF_bsl_2; shift_left_count = 0; if (is_small(Op2)) { shift_left_count = signed_val(Op2); @@ -279,8 +276,7 @@ shift.execute(Fail, Live, Dst) { ires = signed_val(Op1); if (shift_left_count == 0 || ires == 0) { if (is_not_integer(Op2)) { - c_p->freason = BADARITH; - $BIF_ERROR_ARITY_2($Fail, BIF, Op1, Op2); + goto shift_error; } if (ires == 0) { $Dst = Op1; @@ -309,8 +305,7 @@ shift.execute(Fail, Live, Dst) { } else if (is_big(Op1)) { if (shift_left_count == 0) { if (is_not_integer(Op2)) { - c_p->freason = BADARITH; - $BIF_ERROR_ARITY_2($Fail, BIF, Op1, Op2); + goto shift_error; } $Dst = Op1; $NEXT0(); @@ -367,8 +362,22 @@ shift.execute(Fail, Live, Dst) { /* * One or more non-integer arguments. */ + shift_error: c_p->freason = BADARITH; - $BIF_ERROR_ARITY_2($Fail, BIF, Op1, Op2); + if ($Fail) { + $FAIL($Fail); + } else { + reg[0] = Op1; + reg[1] = Op2; + SWAPOUT; + if (I[0] == (BeamInstr) OpCode(i_bsl_ssjId)) { + I = handle_error(c_p, I, reg, &bif_export[BIF_bsl_2]->info.mfa); + } else { + ASSERT(I[0] == (BeamInstr) OpCode(i_bsr_ssjId)); + I = handle_error(c_p, I, reg, &bif_export[BIF_bsr_2]->info.mfa); + } + goto post_error_handling; + } } i_int_bnot(Fail, Src, Live, Dst) { -- cgit v1.2.3 From 961f96e9c7c4ad0b64bc6c88700c6eedd30162b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Fri, 18 Aug 2017 15:50:30 +0200 Subject: beam_makeops: Prevent truncation when packing 'I' values BEAM_WIDE_MASK covered the 16 right-most bits, instead of the 32 right-most bits. This bug will bite us when we'll do more packing in the future. This bug has been harmless in the past. It has been used in test_heap and allocate instructions for the number of heap words needed. It would be theoretically possible to construct a program that would need 65536 or more heap words, but it is hard to imagine a practical use for such a program. (The program would have to build a tuple or list with at least one variable and the rest of the elements being literals.) --- erts/emulator/utils/beam_makeops | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops index f9e22444f3..c3ff7bf31f 100755 --- a/erts/emulator/utils/beam_makeops +++ b/erts/emulator/utils/beam_makeops @@ -705,7 +705,7 @@ sub emulator_output { print "#if !defined(ARCH_64)\n"; print qq[ #error "64-bit architecture assumed, but ARCH_64 not defined"\n]; print "#endif\n"; - print "#define BEAM_WIDE_MASK 0xFFFFUL\n"; + print "#define BEAM_WIDE_MASK 0xFFFFFFFFUL\n"; print "#define BEAM_LOOSE_MASK 0xFFFFUL\n"; print "#define BEAM_TIGHT_MASK 0xFFFFUL\n"; print "#define BEAM_WIDE_SHIFT 32\n"; -- cgit v1.2.3 From f840659e84d16641ca4d821a5928fc8bd2a5c654 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 16 Aug 2017 16:09:05 +0200 Subject: beam_makeops: Add an additional sanity check If a type has a size in %arg_size, it should also have a defined pattern in %bit_type. --- erts/emulator/utils/beam_makeops | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops index c3ff7bf31f..9775a01b89 100755 --- a/erts/emulator/utils/beam_makeops +++ b/erts/emulator/utils/beam_makeops @@ -228,6 +228,12 @@ $match_engine_ops{'TOP_fail'} = 1; sanity("tag '$tag': primitive tags must be named with lowercase letters") unless $tag =~ /^[a-z]$/; } + + foreach my $tag (keys %arg_size) { + defined $type_bit{$tag} or + sanity("the tag '$tag' has a size in %arg_size, " . + "but has no defined bit pattern"); + } } # -- cgit v1.2.3 From 231ccd91c27dd51fe0e1da3c4ca0484bfe88a018 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 16 Aug 2017 16:15:43 +0200 Subject: beam_makeops: Remove the unused aliases 'N' and 'U' I don't remember what they were used for, but they are certainly no longer used. --- erts/emulator/utils/beam_makeops | 2 -- 1 file changed, 2 deletions(-) diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops index 9775a01b89..bbc487dd61 100755 --- a/erts/emulator/utils/beam_makeops +++ b/erts/emulator/utils/beam_makeops @@ -200,8 +200,6 @@ sub define_type_bit { define_type_bit('A', $type_bit{'u'}); define_type_bit('L', $type_bit{'u'}); define_type_bit('b', $type_bit{'u'}); - define_type_bit('N', $type_bit{'u'}); - define_type_bit('U', $type_bit{'u'}); define_type_bit('e', $type_bit{'u'}); define_type_bit('P', $type_bit{'u'}); define_type_bit('Q', $type_bit{'u'}); -- cgit v1.2.3 From 7f7905f653170daf8a185636329701486fec4ad8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 21 Aug 2017 06:34:20 +0200 Subject: Use the wait_timeout_{un}locked_int instructions The transformations were incorrect. --- erts/emulator/beam/beam_load.c | 14 +++++++------- erts/emulator/beam/ops.tab | 5 +++-- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index dcd312f54f..2ab91926ad 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -3420,7 +3420,7 @@ gen_literal_timeout(LoaderState* stp, GenOpArg Fail, GenOpArg Time) Sint timeout; NEW_GENOP(stp, op); - op->op = genop_wait_timeout_unlocked_2; + op->op = genop_wait_timeout_unlocked_int_2; op->next = NULL; op->arity = 2; op->a[0].type = TAG_u; @@ -3467,12 +3467,12 @@ gen_literal_timeout_locked(LoaderState* stp, GenOpArg Fail, GenOpArg Time) Sint timeout; NEW_GENOP(stp, op); - op->op = genop_wait_timeout_locked_2; + op->op = genop_wait_timeout_locked_int_2; op->next = NULL; op->arity = 2; - op->a[0] = Fail; - op->a[1].type = TAG_u; - + op->a[0].type = TAG_u; + op->a[1] = Fail; + if (Time.type == TAG_i && (timeout = Time.val) >= 0 && #if defined(ARCH_64) (timeout >> 32) == 0 @@ -3480,7 +3480,7 @@ gen_literal_timeout_locked(LoaderState* stp, GenOpArg Fail, GenOpArg Time) 1 #endif ) { - op->a[1].val = timeout; + op->a[0].val = timeout; #if !defined(ARCH_64) } else if (Time.type == TAG_q) { Eterm big; @@ -3494,7 +3494,7 @@ gen_literal_timeout_locked(LoaderState* stp, GenOpArg Fail, GenOpArg Time) } else { Uint u; (void) term_to_Uint(big, &u); - op->a[1].val = (BeamInstr) u; + op->a[0].val = (BeamInstr) u; } #endif } else { diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 92e67bb470..955eecd115 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -382,8 +382,8 @@ label L | wait_timeout Fail Src | smp_already_locked(L) => \ label L | wait_timeout_locked Src Fail wait_timeout Fail Src => wait_timeout_unlocked Src Fail -wait_timeout_unlocked Fail Src=aiq => gen_literal_timeout(Fail, Src) -wait_timeout_locked Fail Src=aiq => gen_literal_timeout_locked(Fail, Src) +wait_timeout_unlocked Src=aiq Fail => gen_literal_timeout(Fail, Src) +wait_timeout_locked Src=aiq Fail => gen_literal_timeout_locked(Fail, Src) label L | wait Fail | smp_already_locked(L) => label L | wait_locked Fail wait Fail => wait_unlocked Fail @@ -398,6 +398,7 @@ loop_rec_end f wait_locked f wait_unlocked f +# Note that a timeout value must fit in 32 bits. wait_timeout_unlocked_int I f wait_timeout_unlocked s f wait_timeout_locked_int I f -- cgit v1.2.3 From cbeeed095739223a425649f6085b6959ad905c83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 17 Aug 2017 06:49:22 +0200 Subject: beam_makeops: Introduce the new type 'W' (machine word) As a preparation for potentially improving packing in the future, we will need to make sure that packable types have a defined maximum size. The packer algorithm assumes that two 'I' operands can be packed into one 64-bit word, but there are instructions that use an 'I' operand to store a pointer. It only works because those instructions are not packed for other reasons. Introduce the 'W' type and use it for operands that don't fit in 32 bits. --- erts/emulator/beam/beam_debug.c | 15 +++++++------- erts/emulator/beam/beam_load.c | 33 +++++++++++++++++++++++------- erts/emulator/beam/ops.tab | 42 ++++++++++++++++++++------------------ erts/emulator/test/tuple_SUITE.erl | 7 +++++++ erts/emulator/utils/beam_makeops | 8 +++++--- 5 files changed, 68 insertions(+), 37 deletions(-) diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c index afe87288ce..15b18f0667 100644 --- a/erts/emulator/beam/beam_debug.c +++ b/erts/emulator/beam/beam_debug.c @@ -522,12 +522,13 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr) } ap++; break; - case 'I': /* Untagged integer. */ - case 't': + case 't': /* Untagged integers */ + case 'I': + case 'W': switch (op) { - case op_i_gc_bif1_jIsId: - case op_i_gc_bif2_jIIssd: - case op_i_gc_bif3_jIIssd: + case op_i_gc_bif1_jWsId: + case op_i_gc_bif2_jWIssd: + case op_i_gc_bif3_jWIssd: { const ErtsGcBif* p; BifFunction gcf = (BifFunction) *ap; @@ -672,8 +673,8 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr) } } break; - case op_i_jump_on_val_xfII: - case op_i_jump_on_val_yfII: + case op_i_jump_on_val_xfIW: + case op_i_jump_on_val_yfIW: { int n; for (n = ap[-2]; n > 0; n--) { diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 2ab91926ad..98701db558 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -2388,11 +2388,29 @@ load_code(LoaderState* stp) break; } break; - case 'I': /* Untagged integer (or pointer). */ - VerifyTag(stp, tag, TAG_u); - code[ci++] = tmp_op->a[arg].val; - break; - case 't': /* Small untagged integer -- can be packed. */ + case 't': /* Small untagged integer (16 bits) -- can be packed. */ + case 'I': /* Untagged integer (32 bits) -- can be packed. */ + case 'W': /* Untagged integer or pointer (machine word). */ +#ifdef DEBUG + switch (*sign) { + case 't': + if (tmp_op->a[arg].val >> 16 != 0) { + load_printf(__LINE__, stp, "value %lu of type 't' does not fit in 16 bits", + tmp_op->a[arg].val); + ASSERT(0); + } + break; +#ifdef ARCH_64 + case 'I': + if (tmp_op->a[arg].val >> 32 != 0) { + load_printf(__LINE__, stp, "value %lu of type 'I' does not fit in 32 bits", + tmp_op->a[arg].val); + ASSERT(0); + } + break; +#endif + } +#endif VerifyTag(stp, tag, TAG_u); code[ci++] = tmp_op->a[arg].val; break; @@ -2627,8 +2645,8 @@ load_code(LoaderState* stp) /* Remember offset for the on_load function. */ stp->on_load = ci; break; - case op_bs_put_string_II: - case op_i_bs_match_string_xfII: + case op_bs_put_string_WW: + case op_i_bs_match_string_xfWW: new_string_patch(stp, ci-1); break; @@ -2884,6 +2902,7 @@ gen_element(LoaderState* stp, GenOpArg Fail, GenOpArg Index, op->next = NULL; if (Index.type == TAG_i && Index.val > 0 && + Index.val <= ERTS_MAX_TUPLE_SIZE && (Tuple.type == TAG_x || Tuple.type == TAG_y)) { op->op = genop_i_fast_element_4; op->a[0] = Tuple; diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 955eecd115..09fad05ffd 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -167,7 +167,7 @@ i_select_tuple_arity2 xy f A A f f i_jump_on_val_zero xy f I -i_jump_on_val xy f I I +i_jump_on_val xy f I W get_list xy xy xy @@ -971,6 +971,8 @@ node x node y %hot +# Note: 'I' is sufficient because this instruction will only be used +# if the arity fits in 24 bits. i_fast_element xy j I d i_element xy j s d @@ -1026,7 +1028,7 @@ i_call_fun_last I P make_fun2 OldIndex=u => gen_make_fun2(OldIndex) %cold -i_make_fun I t +i_make_fun W t %hot is_function f xy @@ -1052,14 +1054,14 @@ i_bs_restore2 x I # Matching integers bs_match_string Fail Ms Bits Val => i_bs_match_string Ms Fail Bits Val -i_bs_match_string x f I I +i_bs_match_string x f W W # Fetching integers from binaries. bs_get_integer2 Fail=f Ms=x Live=u Sz=sq Unit=u Flags=u Dst=d => \ gen_get_integer2(Fail, Ms, Live, Sz, Unit, Flags, Dst) -i_bs_get_integer_small_imm x I f I x -i_bs_get_integer_imm x I I f I x +i_bs_get_integer_small_imm x W f t x +i_bs_get_integer_imm x W t f t x i_bs_get_integer f I I s s x i_bs_get_integer_8 x f x i_bs_get_integer_16 x f x @@ -1072,7 +1074,7 @@ i_bs_get_integer_32 x f x bs_get_binary2 Fail=f Ms=x Live=u Sz=sq Unit=u Flags=u Dst=d => \ gen_get_binary2(Fail, Ms, Live, Sz, Unit, Flags, Dst) -i_bs_get_binary_imm2 f x I I I x +i_bs_get_binary_imm2 f x t W t x i_bs_get_binary2 f x I s I x i_bs_get_binary_all2 f x I I x i_bs_get_binary_all_reuse x f I @@ -1090,14 +1092,14 @@ i_bs_get_float2 f x I s I x bs_skip_bits2 Fail=f Ms=x Sz=sq Unit=u Flags=u => \ gen_skip_bits2(Fail, Ms, Sz, Unit, Flags) -i_bs_skip_bits_imm2 f x I +i_bs_skip_bits_imm2 f x W i_bs_skip_bits2 f x xy I i_bs_skip_bits_all2 f x I bs_test_tail2 Fail=f Ms=x Bits=u==0 => bs_test_zero_tail2 Fail Ms bs_test_tail2 Fail=f Ms=x Bits=u => bs_test_tail_imm2 Fail Ms Bits bs_test_zero_tail2 f x -bs_test_tail_imm2 f x I +bs_test_tail_imm2 f x W bs_test_unit F Ms Unit=u==8 => bs_test_unit8 F Ms bs_test_unit f x I @@ -1154,9 +1156,9 @@ i_bs_init_fail xy j I x i_bs_init_fail_heap s I j I x -i_bs_init I I x +i_bs_init W t x -i_bs_init_heap I I I x +i_bs_init_heap W I t x bs_init_bits Fail Sz=o Words Regs Flags Dst => system_limit Fail @@ -1173,8 +1175,8 @@ i_bs_init_bits_fail xy j I x i_bs_init_bits_fail_heap s I j I x -i_bs_init_bits I I x -i_bs_init_bits_heap I I I x +i_bs_init_bits W t x +i_bs_init_bits_heap W I t x bs_add Fail S1=i==0 S2 Unit=u==1 D => move S2 D @@ -1199,7 +1201,7 @@ bs_put_integer Fail=j Sz=sq Unit=u Flags=u Src=s => \ gen_put_integer(Fail, Sz, Unit, Flags, Src) i_new_bs_put_integer j s I s -i_new_bs_put_integer_imm j I I s +i_new_bs_put_integer_imm j W t s # # Utf8/utf16/utf32 support. (R12B-5) @@ -1233,7 +1235,7 @@ bs_put_float Fail=j Sz=s Unit=u Flags=u Src=s => \ gen_put_float(Fail, Sz, Unit, Flags, Src) i_new_bs_put_float j s I s -i_new_bs_put_float_imm j I I s +i_new_bs_put_float_imm j W I s # # Storing binaries into binaries. @@ -1243,7 +1245,7 @@ bs_put_binary Fail=j Sz=s Unit=u Flags=u Src=s => \ gen_put_binary(Fail, Sz, Unit, Flags, Src) i_new_bs_put_binary j s I s -i_new_bs_put_binary_imm j I s +i_new_bs_put_binary_imm j W s i_new_bs_put_binary_all j s I # @@ -1252,7 +1254,7 @@ i_new_bs_put_binary_all j s I # Don't change the instruction format unless you change the loader too. # -bs_put_string I I +bs_put_string W W # # New floating point instructions (R8). @@ -1450,7 +1452,7 @@ gc_bif2 Fail Live u$bif:erlang:bxor/2 S1 S2 Dst => \ gc_bif1 Fail I u$bif:erlang:bnot/1 Src Dst=d => i_int_bnot Fail Src I Dst -i_increment rxy I I d +i_increment rxy W t d i_plus x xy j I d i_plus s s j I d @@ -1499,9 +1501,9 @@ gc_bif2 Fail I Bif S1 S2 Dst => \ gc_bif3 Fail I Bif S1 S2 S3 Dst => \ gen_guard_bif3(Fail, I, Bif, S1, S2, S3, Dst) -i_gc_bif1 j I s I d +i_gc_bif1 j W s I d -i_gc_bif2 j I I s s d +i_gc_bif2 j W I s s d ii_gc_bif3/7 @@ -1510,7 +1512,7 @@ ii_gc_bif3/7 ii_gc_bif3 Fail Bif Live S1 S2 S3 Dst => \ move S1 x | i_gc_bif3 Fail Bif Live S2 S3 Dst -i_gc_bif3 j I I s s d +i_gc_bif3 j W I s s d # # The following instruction is specially handled in beam_load.c diff --git a/erts/emulator/test/tuple_SUITE.erl b/erts/emulator/test/tuple_SUITE.erl index 79b681b4d1..baf41180e0 100644 --- a/erts/emulator/test/tuple_SUITE.erl +++ b/erts/emulator/test/tuple_SUITE.erl @@ -134,6 +134,13 @@ t_element(Config) when is_list(Config) -> {'EXIT', {badarg, _}} = (catch element(1, id(42))), {'EXIT', {badarg, _}} = (catch element(id(1.5), id({a,b}))), + %% Make sure that the loader does not reject the module when + %% huge literal index values are used. + {'EXIT', {badarg, _}} = (catch element((1 bsl 24)-1, id({a,b,c}))), + {'EXIT', {badarg, _}} = (catch element(1 bsl 24, id({a,b,c}))), + {'EXIT', {badarg, _}} = (catch element(1 bsl 32, id({a,b,c}))), + {'EXIT', {badarg, _}} = (catch element(1 bsl 64, id({a,b,c}))), + ok. get_elements([Element|Rest], Tuple, Pos) -> diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops index bbc487dd61..4108138b51 100755 --- a/erts/emulator/utils/beam_makeops +++ b/erts/emulator/utils/beam_makeops @@ -149,8 +149,9 @@ my %arg_size = ('r' => 0, # x(0) - x register zero 'j' => 1, # either 'f' or 'p' 'e' => 1, # pointer to export entry 'L' => 0, # label - 'I' => 1, # untagged integer - 't' => 1, # untagged integer -- can be packed + 't' => 1, # untagged integer (12 bits) -- can be packed + 'I' => 1, # untagged integer (32 bits) -- can be packed + 'W' => 1, # untagged integer/pointer (one word) 'b' => 1, # pointer to bif 'A' => 1, # arity value 'P' => 1, # byte offset into tuple or stack @@ -195,8 +196,9 @@ sub define_type_bit { define_type_bit('j', $type_bit{'f'} | $type_bit{'p'}); # Aliases (for matching purposes). - define_type_bit('I', $type_bit{'u'}); define_type_bit('t', $type_bit{'u'}); + define_type_bit('I', $type_bit{'u'}); + define_type_bit('W', $type_bit{'u'}); define_type_bit('A', $type_bit{'u'}); define_type_bit('L', $type_bit{'u'}); define_type_bit('b', $type_bit{'u'}); -- cgit v1.2.3 From 234ef0eb9803d4c4cfb82848e3d3d14d89d1ddf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Fri, 18 Aug 2017 04:06:52 +0200 Subject: Make map update instruction functions indepedent of instruction format Having the helper functions for map update knowing all the details of operands for the instruction will make it difficult to make improvements such as better packing. --- erts/emulator/beam/beam_debug.c | 8 +++--- erts/emulator/beam/beam_emu.c | 59 +++++++++++++++------------------------ erts/emulator/beam/map_instrs.tab | 20 +++++++------ erts/emulator/beam/ops.tab | 8 +++--- 4 files changed, 42 insertions(+), 53 deletions(-) diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c index 15b18f0667..06b9db2877 100644 --- a/erts/emulator/beam/beam_debug.c +++ b/erts/emulator/beam/beam_debug.c @@ -697,9 +697,9 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr) break; case op_i_put_tuple_xI: case op_i_put_tuple_yI: - case op_new_map_dII: - case op_update_map_assoc_sdII: - case op_update_map_exact_jsdII: + case op_new_map_dtI: + case op_update_map_assoc_sdtI: + case op_update_map_exact_jsdtI: { int n = unpacked[-1]; @@ -719,7 +719,7 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr) } } break; - case op_i_new_small_map_lit_dIq: + case op_i_new_small_map_lit_dtq: { Eterm *tp = tuple_val(unpacked[-1]); int n = arityval(*tp); diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 8ae5c23e0d..dc242229e5 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -399,12 +399,13 @@ static BeamInstr* apply_fun(Process* p, Eterm fun, Eterm args, Eterm* reg) NOINLINE; static Eterm new_fun(Process* p, Eterm* reg, ErlFunEntry* fe, int num_free) NOINLINE; -static Eterm new_map(Process* p, Eterm* reg, BeamInstr* I) NOINLINE; -static Eterm new_small_map_lit(Process* p, Eterm* reg, Uint* n_exp, BeamInstr* I) NOINLINE; -static Eterm update_map_assoc(Process* p, Eterm* reg, - Eterm map, BeamInstr* I) NOINLINE; -static Eterm update_map_exact(Process* p, Eterm* reg, - Eterm map, BeamInstr* I) NOINLINE; +static Eterm new_map(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* ptr) NOINLINE; +static Eterm new_small_map_lit(Process* p, Eterm* reg, Eterm keys_literal, + Uint live, BeamInstr* ptr) NOINLINE; +static Eterm update_map_assoc(Process* p, Eterm* reg, Uint live, + Uint n, BeamInstr* new_p) NOINLINE; +static Eterm update_map_exact(Process* p, Eterm* reg, Uint live, + Uint n, Eterm* new_p) NOINLINE; static Eterm get_map_element(Eterm map, Eterm key); static Eterm get_map_element_hash(Eterm map, Eterm key, Uint32 hx); @@ -2727,24 +2728,20 @@ do { \ static Eterm -new_map(Process* p, Eterm* reg, BeamInstr* I) +new_map(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* ptr) { - Uint n = Arg(3); Uint i; Uint need = n + 1 /* hdr */ + 1 /*size*/ + 1 /* ptr */ + 1 /* arity */; Eterm keys; Eterm *mhp,*thp; Eterm *E; - BeamInstr *ptr; flatmap_t *mp; ErtsHeapFactory factory; - ptr = &Arg(4); - if (n > 2*MAP_SMALL_MAP_LIMIT) { Eterm res; if (HeapWordsLeft(p) < n) { - erts_garbage_collect(p, n, reg, Arg(2)); + erts_garbage_collect(p, n, reg, live); } mhp = p->htop; @@ -2765,7 +2762,7 @@ new_map(Process* p, Eterm* reg, BeamInstr* I) } if (HeapWordsLeft(p) < need) { - erts_garbage_collect(p, need, reg, Arg(2)); + erts_garbage_collect(p, need, reg, live); } thp = p->htop; @@ -2788,24 +2785,20 @@ new_map(Process* p, Eterm* reg, BeamInstr* I) } static Eterm -new_small_map_lit(Process* p, Eterm* reg, Uint* n_exp, BeamInstr* I) +new_small_map_lit(Process* p, Eterm* reg, Eterm keys_literal, Uint live, BeamInstr* ptr) { - Eterm* keys = tuple_val(Arg(3)); + Eterm* keys = tuple_val(keys_literal); Uint n = arityval(*keys); Uint need = n + 1 /* hdr */ + 1 /*size*/ + 1 /* ptr */ + 1 /* arity */; Uint i; - BeamInstr *ptr; flatmap_t *mp; Eterm *mhp; Eterm *E; - *n_exp = n; - ptr = &Arg(4); - ASSERT(n <= MAP_SMALL_MAP_LIMIT); if (HeapWordsLeft(p) < need) { - erts_garbage_collect(p, need, reg, Arg(2)); + erts_garbage_collect(p, need, reg, live); } mhp = p->htop; @@ -2814,7 +2807,7 @@ new_small_map_lit(Process* p, Eterm* reg, Uint* n_exp, BeamInstr* I) mp = (flatmap_t *)mhp; mhp += MAP_HEADER_FLATMAP_SZ; mp->thing_word = MAP_HEADER_FLATMAP; mp->size = n; - mp->keys = Arg(3); + mp->keys = keys_literal; for (i = 0; i < n; i++) { GET_TERM(*ptr++, *mhp++); @@ -2826,9 +2819,8 @@ new_small_map_lit(Process* p, Eterm* reg, Uint* n_exp, BeamInstr* I) } static Eterm -update_map_assoc(Process* p, Eterm* reg, Eterm map, BeamInstr* I) +update_map_assoc(Process* p, Eterm* reg, Uint live, Uint n, BeamInstr* new_p) { - Uint n; Uint num_old; Uint num_updates; Uint need; @@ -2838,12 +2830,12 @@ update_map_assoc(Process* p, Eterm* reg, Eterm map, BeamInstr* I) Eterm* E; Eterm* old_keys; Eterm* old_vals; - BeamInstr* new_p; Eterm new_key; Eterm* kp; + Eterm map; - new_p = &Arg(4); - num_updates = Arg(3) / 2; + num_updates = n / 2; + map = reg[live]; if (is_not_flatmap(map)) { Uint32 hx; @@ -2873,7 +2865,7 @@ update_map_assoc(Process* p, Eterm* reg, Eterm map, BeamInstr* I) */ if (num_old == 0) { - return new_map(p, reg, I); + return new_map(p, reg, live, n, new_p); } /* @@ -2883,8 +2875,6 @@ update_map_assoc(Process* p, Eterm* reg, Eterm map, BeamInstr* I) need = 2*(num_old+num_updates) + 1 + MAP_HEADER_FLATMAP_SZ; if (HeapWordsLeft(p) < need) { - Uint live = Arg(2); - reg[live] = map; erts_garbage_collect(p, need, reg, live+1); map = reg[live]; old_mp = (flatmap_t *)flatmap_val(map); @@ -3031,9 +3021,8 @@ update_map_assoc(Process* p, Eterm* reg, Eterm map, BeamInstr* I) */ static Eterm -update_map_exact(Process* p, Eterm* reg, Eterm map, BeamInstr* I) +update_map_exact(Process* p, Eterm* reg, Uint live, Uint n, Eterm* new_p) { - Uint n; Uint i; Uint num_old; Uint need; @@ -3043,12 +3032,12 @@ update_map_exact(Process* p, Eterm* reg, Eterm map, BeamInstr* I) Eterm* E; Eterm* old_keys; Eterm* old_vals; - BeamInstr* new_p; Eterm new_key; + Eterm map; - new_p = &Arg(5); - n = Arg(4) / 2; /* Number of values to be updated */ + n /= 2; /* Number of values to be updated */ ASSERT(n > 0); + map = reg[live]; if (is_not_flatmap(map)) { Uint32 hx; @@ -3102,8 +3091,6 @@ update_map_exact(Process* p, Eterm* reg, Eterm map, BeamInstr* I) need = num_old + MAP_HEADER_FLATMAP_SZ; if (HeapWordsLeft(p) < need) { - Uint live = Arg(3); - reg[live] = map; erts_garbage_collect(p, need, reg, live+1); map = reg[live]; old_mp = (flatmap_t *)flatmap_val(map); diff --git a/erts/emulator/beam/map_instrs.tab b/erts/emulator/beam/map_instrs.tab index 30c3d7743f..bbb2f49b66 100644 --- a/erts/emulator/beam/map_instrs.tab +++ b/erts/emulator/beam/map_instrs.tab @@ -31,22 +31,24 @@ new_map(Dst, Live, N) { Eterm res; HEAVY_SWAPOUT; - res = new_map(c_p, reg, I-1); + res = new_map(c_p, reg, $Live, $N, $NEXT_INSTRUCTION); HEAVY_SWAPIN; $REFRESH_GEN_DEST(); $Dst = res; $NEXT($NEXT_INSTRUCTION+$N); } -i_new_small_map_lit(Dst, Live, Literal) { +i_new_small_map_lit(Dst, Live, Keys) { Eterm res; Uint n; + Eterm keys = $Keys; HEAVY_SWAPOUT; - res = new_small_map_lit(c_p, reg, &n, I-1); + res = new_small_map_lit(c_p, reg, keys, $Live, $NEXT_INSTRUCTION); HEAVY_SWAPIN; $REFRESH_GEN_DEST(); $Dst = res; + n = arityval(*tuple_val(keys)); $NEXT($NEXT_INSTRUCTION+n); } @@ -127,11 +129,11 @@ i_get_map_elements(Fail, Src, N) { update_map_assoc(Src, Dst, Live, N) { Eterm res; - Eterm map; + Uint live = $Live; - map = $Src; + reg[live] = $Src; HEAVY_SWAPOUT; - res = update_map_assoc(c_p, reg, map, I); + res = update_map_assoc(c_p, reg, live, $N, $NEXT_INSTRUCTION); HEAVY_SWAPIN; ASSERT(is_value(res)); $REFRESH_GEN_DEST(); @@ -141,11 +143,11 @@ update_map_assoc(Src, Dst, Live, N) { update_map_exact(Fail, Src, Dst, Live, N) { Eterm res; - Eterm map; + Uint live = $Live; - map = $Src; + reg[live] = $Src; HEAVY_SWAPOUT; - res = update_map_exact(c_p, reg, map, I); + res = update_map_exact(c_p, reg, live, $N, $NEXT_INSTRUCTION); HEAVY_SWAPIN; if (is_value(res)) { $REFRESH_GEN_DEST(); diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 09fad05ffd..deee1a4de9 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -1357,10 +1357,10 @@ sorted_put_map_exact F Src Dst Live Size Rest=* => \ new_map Dst Live Size Rest=* | is_small_map_literal_keys(Size, Rest) => \ gen_new_small_map_lit(Dst, Live, Size, Rest) -new_map d I I -i_new_small_map_lit d I q -update_map_assoc s d I I -update_map_exact j s d I I +new_map d t I +i_new_small_map_lit d t q +update_map_assoc s d t I +update_map_exact j s d t I is_map Fail Lit=q | literal_is_map(Lit) => is_map Fail cq => jump Fail -- cgit v1.2.3 From 8f2646de9a74d82d8527782bdb58584c60e99742 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 17 Aug 2017 11:59:26 +0200 Subject: beam_makeops: Rewrite the packer, fixing several bugs The packer had several bugs and limitations. For instance, on a 32-bit Erlang virtual machine it would gladly pack three 't' values into one word even though it would be not safe. The rewritten version will be more careful how much it packs into each word. It will also be able to do packing for more instructions. --- erts/emulator/beam/beam_emu.c | 1 + erts/emulator/utils/beam_makeops | 259 +++++++++++++++++++++++++-------------- 2 files changed, 166 insertions(+), 94 deletions(-) diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index dc242229e5..badfe62aa9 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -238,6 +238,7 @@ void** beam_ops; PROCESS_MAIN_CHK_LOCKS((P)); \ ERTS_UNREQ_PROC_MAIN_LOCK((P)) +#define db(N) (N) #define tb(N) (N) #define xb(N) (*(Eterm *) (((unsigned char *)reg) + (N))) #define yb(N) (*(Eterm *) (((unsigned char *)E) + (N))) diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops index 4108138b51..2a957d2c9d 100755 --- a/erts/emulator/utils/beam_makeops +++ b/erts/emulator/utils/beam_makeops @@ -54,11 +54,6 @@ $pack_mask[4] = ['BEAM_LOOSE_MASK', # Only for 64 bit wordsize 'BEAM_LOOSE_MASK', $WHOLE_WORD]; -# Mapping from packagable arguments to number of packed arguments per -# word. Initialized after the wordsize is known. - -my @args_per_word; - # There are two types of instructions: generic and specific. # The generic instructions are those generated by the Beam compiler. # Corresponding to each generic instruction, there is generally a @@ -264,15 +259,8 @@ if ($wordsize == 32) { # Initialize number of arguments per packed word. # -$args_per_word[2] = 2; -$args_per_word[3] = 3; -$args_per_word[4] = 2; -$args_per_word[5] = 3; -$args_per_word[6] = 3; - if ($wordsize == 64) { $pack_mask[3] = ['BEAM_TIGHT_MASK', 'BEAM_TIGHT_MASK', $WHOLE_WORD]; - $args_per_word[4] = 4; } # @@ -1240,7 +1228,7 @@ sub basic_generator { my $c_code_ref = $c_code{$name}; if ($hot and defined $c_code_ref) { - ($prefix, $pack_spec, @args) = do_pack(@args); + ($var_decls, $pack_spec, @args) = do_pack(@args); } # @@ -1254,7 +1242,14 @@ sub basic_generator { my($this_size) = $arg_size{$_}; SWITCH: { - /^pack:(\d):(.*)/ and do { + /^packed:d:(\d):(.*)/ and do { + $var_decls .= "Eterm dst = $2;\n" . + "Eterm* dst_ptr = REG_TARGET_PTR(dst);\n"; + push(@f, "*dst_ptr"); + $this_size = $1; + last SWITCH; + }; + /^packed:[a-zA-z]:(\d):(.*)/ and do { push(@f, $2); $this_size = $1; last SWITCH; @@ -1458,29 +1453,28 @@ sub expand_macro { sub do_pack { my(@args) = @_; my($packable_args) = 0; - my @is_packable; # Packability (boolean) for each argument. - my $wide_packing = 0; - my(@orig_args) = @args; + my @bits_needed; # Bits needed for each argument. # - # Count the number of packable arguments. If we encounter any 's' or 'd' - # arguments, packing is not possible. + # Define the minimum number of bits needed for the packable argument types. + # + my %bits_needed = ('x' => 10, + 'y' => 10, + 'Q' => 10, + 'l' => 10, + 'd' => 16, + 't' => 16); + if ($wordsize == 64) { + $bits_needed{'I'} = 32; + } + + # + # Count the number of packable arguments. # - my $packable_types = "xytQ"; foreach my $arg (@args) { - if ($arg =~ /^[$packable_types]/) { + if (defined $bits_needed{$arg}) { $packable_args++; - push @is_packable, 1; - } elsif ($arg =~ /^I/ and $wordsize == 64 and $packable_args < 2) { - $wide_packing = 1; - push @is_packable, 1; - if (++$packable_args == 2) { - # We can only pack two arguments. Turn off packing - # for the rest of the arguments. - $packable_types = "\xFF"; - } - } elsif ($arg =~ /^[sd]/) { - return ('', '', @args); + push @bits_needed, $bits_needed{$arg}; } elsif ($arg =~ /^[scq]/ and $packable_args > 0) { # When packing, this operand will be picked up from the # code array, put onto the packing stack, and later put @@ -1494,87 +1488,164 @@ sub do_pack { # just turn off packing. return ('', '', @args); } else { - push @is_packable, 0; + push @bits_needed, 0; } } # - # Get out of here if too few or too many arguments. + # Nothing to pack unless there are at least 2 packable arguments. # return ('', '', @args) if $packable_args < 2; - my($size) = 0; - my($pack_prefix) = ''; - my($down) = ''; # Pack commands (towards instruction + # + # Determine how many arguments we should pack into each word. + # + my @args_per_word; + my @need_wide_mask; + my $bits = 0; + my $word = 0; + $args_per_word[0] = 0; + $need_wide_mask[0] = 0; + for (my $i = 0; $i < @args; $i++) { + if ($bits_needed[$i]) { + my $needed = $bits_needed[$i]; + + my $next_word = sub { + $word++; + $args_per_word[$word] = 0; + $need_wide_mask[$word] = 0; + $bits = 0; + }; + + if ($bits+$needed > $wordsize) { # Does not fit. + $next_word->(); + } + if ($args_per_word[$word] == 4) { # Can't handle more than 4 args. + $next_word->(); + } + if ($needed == 32 and $args_per_word[$word] > 1) { + # Must only pack two arguments in this word, and there + # are already at least two arguments here. + $next_word->(); + } + $args_per_word[$word]++; + $bits += $needed; + if ($needed == 32) { + $need_wide_mask[$word]++; + } + if ($need_wide_mask[$word] and $bits > 32) { + # Can only pack two things in a word where one + # item is 32 bits. Force the next item into + # the next word. + $bits = $wordsize; + } + } + } + + # + # Try to balance packing between words. + # + if ($args_per_word[$#args_per_word] == 1) { + if ($args_per_word[$#args_per_word-1] < 3) { + pop @args_per_word; + } else { + $args_per_word[$#args_per_word-1]--; + $args_per_word[$#args_per_word]++; + } + } elsif (@args_per_word == 2 and + $args_per_word[0] == 4 and + $args_per_word[1] == 2) { + $args_per_word[0] = 3; + $args_per_word[1] = 3; + } elsif (@args_per_word == 2 and + $args_per_word[0] == 3 and + $args_per_word[1] == 1) { + $args_per_word[0] = 2; + $args_per_word[1] = 2; + } + + my $size = 0; + my $pack_prefix = ''; + my $down = ''; # Pack commands (towards instruction # beginning). - my($up) = ''; # Pack commands (storing back while + my $up = ''; # Pack commands (storing back while # moving forward). + my $did_some_packing = 0; # Nothing packed yet. - my $args_per_word = $args_per_word[$packable_args]; - my @shift; - my @mask; - my @instr; + # Skip an unpackable argument. + my $skip_unpackable = sub { + my($arg) = @_; - if ($wide_packing) { - @shift = ('0', 'BEAM_WIDE_SHIFT'); - @mask = ('BEAM_WIDE_MASK', $WHOLE_WORD); - @instr = ('w', 'i'); - } else { - @shift = @{$pack_shift[$args_per_word]}; - @mask = @{$pack_mask[$args_per_word]}; - @instr = @{$pack_instr[$args_per_word]}; - } + if ($arg_size{$arg} and $did_some_packing) { + # Save the argument on the pack engine's stack. + $down = "g${down}"; + $up = "${up}p"; + } else { + # The argument has either zero size (e.g. r(0)), + # or is to the left of the first packed argument + # and will never be accessed. No need to do + # anything. + } + }; # # Now generate the packing instructions. One complication is that # the packing engine works from right-to-left, but we must generate # the instructions from left-to-right because we must calculate # instruction sizes from left-to-right. - # - # XXX Packing 3 't's in one word won't work. Sorry. - my $did_some_packing = 0; # Nothing packed yet. - my($ap) = 0; # Argument number within word. - my($tmpnum) = 1; # Number of temporary variable. - my($expr) = ''; - for (my $i = 0; $i < @args; $i++) { - my($reg) = $args[$i]; - my($this_size) = $arg_size{$reg}; - if ($is_packable[$i]) { - $this_size = 0; - $did_some_packing = 1; - - if ($ap == 0) { - $pack_prefix .= "Eterm tmp_packed$tmpnum = Arg($size);\n"; - $up .= "p"; - $down = "P$down"; - $this_size = 1; - } + my $arg_num = 0; + for (my $word = 0; $word < @args_per_word; $word++) { + my $ap = 0; # Argument number within word. + my $packed_var = "tmp_packed" . ($word+1); + my $args_per_word = $args_per_word[$word]; + my @shift; + my @mask; + my @instr; + + if ($need_wide_mask[$word]) { + @shift = ('0', 'BEAM_WIDE_SHIFT'); + @mask = ('BEAM_WIDE_MASK', $WHOLE_WORD); + @instr = ('w', 'i'); + } else { + @shift = @{$pack_shift[$args_per_word]}; + @mask = @{$pack_mask[$args_per_word]}; + @instr = @{$pack_instr[$args_per_word]}; + } - $down = "$instr[$ap]$down"; - my($unpack) = make_unpack($tmpnum, $shift[$ap], $mask[$ap]); - $args[$i] = "pack:$this_size:$reg" . "b($unpack)"; + while ($ap < $args_per_word) { + my $reg = $args[$arg_num]; + my $this_size = $arg_size{$reg}; + if ($bits_needed[$arg_num]) { + $this_size = 0; + $did_some_packing = 1; + + if ($ap == 0) { + $pack_prefix .= "Eterm $packed_var = Arg($size);\n"; + $up .= "p"; + $down = "P$down"; + $this_size = 1; + } - if (++$ap == $args_per_word) { - $ap = 0; - $tmpnum++; - } - } elsif ($arg_size{$reg} && $did_some_packing) { - # - # This is an argument that can't be packed. Normally, we must - # save it on the pack engine's stack, unless: - # - # 1. The argument has zero size (e.g. r(0)). Such arguments - # will not be loaded. They disappear. - # 2. If the argument is on the left of the first packed argument, - # the packing engine will never access it (because the engine - # operates from right-to-left). - # + $down = "$instr[$ap]$down"; + my $unpack = make_unpack($packed_var, $shift[$ap], $mask[$ap]); + $args[$arg_num] = "packed:$reg:$this_size:$reg" . "b($unpack)"; - $down = "g${down}"; - $up = "${up}p"; - } - $size += $this_size; + $ap++; + } else { + $skip_unpackable->($reg); + } + $size += $this_size; + $arg_num++; + } + } + + # + # Skip any unpackable arguments at the end. + # + while ($arg_num < @args) { + $skip_unpackable->($args[$arg_num]); + $arg_num++; } my $pack_spec = $down . $up; @@ -1582,9 +1653,9 @@ sub do_pack { } sub make_unpack { - my($tmpnum, $shift, $mask) = @_; + my($packed_var, $shift, $mask) = @_; - my($e) = "tmp_packed$tmpnum"; + my $e = $packed_var; $e = "($e>>$shift)" if $shift; $e .= "&$mask" unless $mask eq $WHOLE_WORD; $e; -- cgit v1.2.3 From 0848894878ef90b1c6d81e4a6e2741508d0b99f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Fri, 18 Aug 2017 08:27:51 +0200 Subject: Pack instructions using 'q', 'c', and 's' Update the pack engine to safely push literal operands to the pack stack and to safely pop them back to another code address. That will allow packing of more instructions. --- erts/emulator/beam/beam_load.c | 39 +++++++++++++++++++++++++++++++-------- erts/emulator/utils/beam_makeops | 12 ------------ 2 files changed, 31 insertions(+), 20 deletions(-) diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 98701db558..982f998ae3 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -2495,16 +2495,32 @@ load_code(LoaderState* stp) * The packing engine. */ if (opc[stp->specific_op].pack[0]) { - char* prog; /* Program for packing engine. */ - BeamInstr stack[8]; /* Stack. */ - BeamInstr* sp = stack; /* Points to next free position. */ - BeamInstr packed = 0; /* Accumulator for packed operations. */ + char* prog; /* Program for packing engine. */ + struct pack_stack { + BeamInstr instr; + LiteralPatch* patch; + } stack[8]; /* Stack. */ + struct pack_stack* sp = stack; /* Points to next free position. */ + BeamInstr packed = 0; /* Accumulator for packed operations. */ for (prog = opc[stp->specific_op].pack; *prog; prog++) { switch (*prog) { case 'g': /* Get instruction; push on stack. */ - *sp++ = code[--ci]; - break; + { + LiteralPatch* lp; + + ci--; + sp->instr = code[ci]; + sp->patch = 0; + for (lp = stp->literal_patches; lp && lp->pos > ci-MAX_OPARGS; lp = lp->next) { + if (lp->pos == ci) { + sp->patch = lp; + break; + } + } + sp++; + } + break; case 'i': /* Initialize packing accumulator. */ packed = code[--ci]; break; @@ -2520,10 +2536,17 @@ load_code(LoaderState* stp) break; #endif case 'p': /* Put instruction (from stack). */ - code[ci++] = *--sp; + --sp; + code[ci] = sp->instr; + if (sp->patch) { + sp->patch->pos = ci; + } + ci++; break; case 'P': /* Put packed operands. */ - *sp++ = packed; + sp->instr = packed; + sp->patch = 0; + sp++; packed = 0; break; default: diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops index 2a957d2c9d..5f8395a289 100755 --- a/erts/emulator/utils/beam_makeops +++ b/erts/emulator/utils/beam_makeops @@ -1475,18 +1475,6 @@ sub do_pack { if (defined $bits_needed{$arg}) { $packable_args++; push @bits_needed, $bits_needed{$arg}; - } elsif ($arg =~ /^[scq]/ and $packable_args > 0) { - # When packing, this operand will be picked up from the - # code array, put onto the packing stack, and later put - # back into a different location in the code. The problem - # is that if this operand is a literal, the original - # location in the code would have been remembered in a - # literal patch. For packing to work, we would have to - # adjust the position in the literal patch. For the - # moment, adding additional instructions to the packing - # engine to handle this does not seem worth it, so we will - # just turn off packing. - return ('', '', @args); } else { push @bits_needed, 0; } -- cgit v1.2.3 From 5725b59f0c14a3bdc665f8543a30a8a5985a3d3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Fri, 18 Aug 2017 09:03:05 +0200 Subject: Pack cold instructions too Cold instructions used to be cooler (less frequently executed), so it did not seem worthwhile to pack their operands. Now bit syntax instructions are included among the cold instructions, and they are frequently used. --- erts/emulator/utils/beam_makeops | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops index 5f8395a289..fcbd3de4c1 100755 --- a/erts/emulator/utils/beam_makeops +++ b/erts/emulator/utils/beam_makeops @@ -578,7 +578,7 @@ sub emulator_output { # for the emulator. # my($size, $code, $pack) = - basic_generator($name, $hot, '', 0, undef, @args); + basic_generator($name, 1, '', 0, undef, @args); # # Save the generated $code for later. -- cgit v1.2.3 From 8fc304e1e0c4a36dcb5abbe7a51de63ddc2cb285 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 21 Aug 2017 07:18:49 +0200 Subject: Introduce more packable types The 'I' type can be replaced with 't' if we know that the value will fit in 16 bits. The 'P' type can be replaced with 'Q' if it is used for deallocating a stack frame. --- erts/emulator/beam/arith_instrs.tab | 4 +- erts/emulator/beam/beam_debug.c | 6 +-- erts/emulator/beam/ops.tab | 82 ++++++++++++++++++------------------- 3 files changed, 46 insertions(+), 46 deletions(-) diff --git a/erts/emulator/beam/arith_instrs.tab b/erts/emulator/beam/arith_instrs.tab index a5c84d41d6..224945287b 100644 --- a/erts/emulator/beam/arith_instrs.tab +++ b/erts/emulator/beam/arith_instrs.tab @@ -370,10 +370,10 @@ shift.execute(Fail, Live, Dst) { reg[0] = Op1; reg[1] = Op2; SWAPOUT; - if (I[0] == (BeamInstr) OpCode(i_bsl_ssjId)) { + if (I[0] == (BeamInstr) OpCode(i_bsl_ssjtd)) { I = handle_error(c_p, I, reg, &bif_export[BIF_bsl_2]->info.mfa); } else { - ASSERT(I[0] == (BeamInstr) OpCode(i_bsr_ssjId)); + ASSERT(I[0] == (BeamInstr) OpCode(i_bsr_ssjtd)); I = handle_error(c_p, I, reg, &bif_export[BIF_bsr_2]->info.mfa); } goto post_error_handling; diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c index 06b9db2877..49414ae8fc 100644 --- a/erts/emulator/beam/beam_debug.c +++ b/erts/emulator/beam/beam_debug.c @@ -526,9 +526,9 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr) case 'I': case 'W': switch (op) { - case op_i_gc_bif1_jWsId: - case op_i_gc_bif2_jWIssd: - case op_i_gc_bif3_jWIssd: + case op_i_gc_bif1_jWstd: + case op_i_gc_bif2_jWtssd: + case op_i_gc_bif3_jWtssd: { const ErtsGcBif* p; BifFunction gcf = (BifFunction) *ap; diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index deee1a4de9..4f79177c89 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -108,7 +108,7 @@ allocate_zero t t allocate_heap_zero t I t trim N Remaining => i_trim N -i_trim I +i_trim t test_heap I t @@ -590,7 +590,7 @@ is_integer Fail Literal=q => move Literal x | is_integer Fail x is_integer Fail=f S=x | allocate Need Regs => is_integer_allocate Fail S Need Regs -is_integer_allocate f x I I +is_integer_allocate f x t t is_integer f xy @@ -609,7 +609,7 @@ is_nonempty_list F=f x==0 | test_heap I1 I2 => is_nonempty_list_test_heap F I1 I is_nonempty_list Fail=f S=x | get_list S D1=x D2=x => \ is_nonempty_list_get_list Fail S D1 D2 -is_nonempty_list_allocate f rx I t +is_nonempty_list_allocate f rx t t is_nonempty_list_test_heap f I t is_nonempty_list_get_list f rx x x is_nonempty_list f xy @@ -685,7 +685,7 @@ is_function2 f s s allocate Need Regs | init Y => allocate_init Need Regs Y init Y1 | init Y2 => init2 Y1 Y2 -allocate_init t I y +allocate_init t t y ################################################################# # External function and bif calls. @@ -1006,15 +1006,15 @@ call_last Ar Func D => i_call_last Func D call_only Ar Func => i_call_only Func i_call f -i_call_last f P +i_call_last f Q i_call_only f i_call_ext e -i_call_ext_last e P +i_call_ext_last e Q i_call_ext_only e i_move_call_ext c e -i_move_call_ext_last e P c +i_move_call_ext_last e Q c i_move_call_ext_only e c # Fun calls. @@ -1022,8 +1022,8 @@ i_move_call_ext_only e c call_fun Arity | deallocate D | return => i_call_fun_last Arity D call_fun Arity => i_call_fun Arity -i_call_fun I -i_call_fun_last I P +i_call_fun t +i_call_fun_last t Q make_fun2 OldIndex=u => gen_make_fun2(OldIndex) @@ -1152,9 +1152,9 @@ bs_init2 Fail Sz Words=u==0 Regs Flags Dst => \ bs_init2 Fail Sz Words Regs Flags Dst => \ i_bs_init_fail_heap Sz Words Fail Regs Dst -i_bs_init_fail xy j I x +i_bs_init_fail xy j t x -i_bs_init_fail_heap s I j I x +i_bs_init_fail_heap s I j t x i_bs_init W t x @@ -1171,16 +1171,16 @@ bs_init_bits Fail Sz Words=u==0 Regs Flags Dst => \ bs_init_bits Fail Sz Words Regs Flags Dst => \ i_bs_init_bits_fail_heap Sz Words Fail Regs Dst -i_bs_init_bits_fail xy j I x +i_bs_init_bits_fail xy j t x -i_bs_init_bits_fail_heap s I j I x +i_bs_init_bits_fail_heap s I j t x i_bs_init_bits W t x i_bs_init_bits_heap W I t x bs_add Fail S1=i==0 S2 Unit=u==1 D => move S2 D -bs_add j s s I x +bs_add j s s t x bs_append Fail Size Extra Live Unit Bin Flags Dst => \ move Bin x | i_bs_append Fail Extra Live Unit Size Dst @@ -1190,8 +1190,8 @@ bs_private_append Fail Size Unit Bin Flags Dst => \ bs_init_writable -i_bs_append j I I I s x -i_bs_private_append j I s s x +i_bs_append j I t t s x +i_bs_private_append j t s s x # # Storing integers into binaries. @@ -1200,7 +1200,7 @@ i_bs_private_append j I s s x bs_put_integer Fail=j Sz=sq Unit=u Flags=u Src=s => \ gen_put_integer(Fail, Sz, Unit, Flags, Src) -i_new_bs_put_integer j s I s +i_new_bs_put_integer j s t s i_new_bs_put_integer_imm j W t s # @@ -1219,7 +1219,7 @@ bs_put_utf8 Fail u Src=s => i_bs_put_utf8 Fail Src i_bs_put_utf8 j s -bs_put_utf16 j I s +bs_put_utf16 j t s bs_put_utf32 Fail=j Flags=u Src=s => \ i_bs_validate_unicode Fail Src | bs_put_integer Fail i=32 u=1 Flags Src @@ -1234,8 +1234,8 @@ bs_put_float Fail Sz=q Unit Flags Val => badarg Fail bs_put_float Fail=j Sz=s Unit=u Flags=u Src=s => \ gen_put_float(Fail, Sz, Unit, Flags, Src) -i_new_bs_put_float j s I s -i_new_bs_put_float_imm j W I s +i_new_bs_put_float j s t s +i_new_bs_put_float_imm j W t s # # Storing binaries into binaries. @@ -1244,9 +1244,9 @@ i_new_bs_put_float_imm j W I s bs_put_binary Fail=j Sz=s Unit=u Flags=u Src=s => \ gen_put_binary(Fail, Sz, Unit, Flags, Src) -i_new_bs_put_binary j s I s +i_new_bs_put_binary j s t s i_new_bs_put_binary_imm j W s -i_new_bs_put_binary_all j s I +i_new_bs_put_binary_all j s t # # Warning: The i_bs_put_string and i_new_bs_put_string instructions @@ -1298,8 +1298,8 @@ fclearerror # New apply instructions in R10B. # -apply I -apply_last I P +apply t +apply_last t Q # # Handle compatibility with OTP 17 here. @@ -1454,30 +1454,30 @@ gc_bif1 Fail I u$bif:erlang:bnot/1 Src Dst=d => i_int_bnot Fail Src I Dst i_increment rxy W t d -i_plus x xy j I d -i_plus s s j I d +i_plus x xy j t d +i_plus s s j t d -i_minus x x j I d -i_minus s s j I d +i_minus x x j t d +i_minus s s j t d -i_times j I s s d +i_times j t s s d -i_m_div j I s s d -i_int_div j I s s d +i_m_div j t s s d +i_int_div j t s s d -i_rem x x j I d -i_rem s s j I d +i_rem x x j t d +i_rem s s j t d -i_bsl s s j I d -i_bsr s s j I d +i_bsl s s j t d +i_bsr s s j t d -i_band x c j I d -i_band s s j I d +i_band x c j t d +i_band s s j t d i_bor j I s s d i_bxor j I s s d -i_int_bnot j s I d +i_int_bnot j s t d # # Old guard BIFs that creates heap fragments are no longer allowed. @@ -1501,9 +1501,9 @@ gc_bif2 Fail I Bif S1 S2 Dst => \ gc_bif3 Fail I Bif S1 S2 S3 Dst => \ gen_guard_bif3(Fail, I, Bif, S1, S2, S3, Dst) -i_gc_bif1 j W s I d +i_gc_bif1 j W s t d -i_gc_bif2 j W I s s d +i_gc_bif2 j W t s s d ii_gc_bif3/7 @@ -1512,7 +1512,7 @@ ii_gc_bif3/7 ii_gc_bif3 Fail Bif Live S1 S2 S3 Dst => \ move S1 x | i_gc_bif3 Fail Bif Live S2 S3 Dst -i_gc_bif3 j W I s s d +i_gc_bif3 j W t s s d # # The following instruction is specially handled in beam_load.c -- cgit v1.2.3 From 5bf73db9fd77bce195b7e78a8754730263068fb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 21 Aug 2017 18:03:04 +0200 Subject: Add the 'S' type for a register source The type 'd' could be used both for destination registers and source register. Restrict the 'd' type to only be used for destinations, and introduce the new 'S' type to be used when a source must be a register. --- erts/emulator/beam/beam_emu.c | 3 ++- erts/emulator/beam/beam_load.c | 3 ++- erts/emulator/beam/ops.tab | 6 +++--- erts/emulator/utils/beam_makeops | 5 ++++- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index badfe62aa9..482f99cb78 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -153,7 +153,7 @@ do { \ * Register target (X or Y register). */ -#define REG_TARGET_PTR(Target) (((Target) & 1) ? &yb(Target-1) : &xb(Target)) +#define REG_TARGET_PTR(Target) (((Target) & 1) ? &yb((Target)-1) : &xb(Target)) /* * Special Beam instructions. @@ -242,6 +242,7 @@ void** beam_ops; #define tb(N) (N) #define xb(N) (*(Eterm *) (((unsigned char *)reg) + (N))) #define yb(N) (*(Eterm *) (((unsigned char *)E) + (N))) +#define Sb(N) (*REG_TARGET_PTR(N)) #define lb(N) (*(double *) (((unsigned char *)&(freg[0].fd)) + (N))) #define Qb(N) (N) #define Ib(N) (N) diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 982f998ae3..9ff32e30f3 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -2374,7 +2374,8 @@ load_code(LoaderState* stp) break; } break; - case 'd': /* Destination (x(0), x(N), y(N) */ + case 'd': /* Destination (x(N), y(N) */ + case 'S': /* Source (x(N), y(N)) */ switch (tag) { case TAG_x: code[ci++] = tmp_op->a[arg].val * sizeof(Eterm); diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 4f79177c89..b6e995fdbe 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -192,7 +192,7 @@ try_case_end s # Destructive set tuple element -set_tuple_element s d P +set_tuple_element s S P # Get tuple element @@ -1272,9 +1272,9 @@ fmove Arg=l Dst=d => fstore Arg Dst fmove Arg=dq Dst=l => fload Arg Dst fstore l d -fload dq l +fload Sq l -fconv d l +fconv S l i_fadd l l l i_fsub l l l diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops index fcbd3de4c1..ee8b12681f 100755 --- a/erts/emulator/utils/beam_makeops +++ b/erts/emulator/utils/beam_makeops @@ -139,6 +139,7 @@ my %arg_size = ('r' => 0, # x(0) - x register zero 'n' => 0, # NIL (implicit) 'c' => 1, # tagged constant (integer, atom, nil) 's' => 1, # tagged source; any of the above + 'S' => 1, # tagged source register (x or y) 'd' => 1, # tagged destination register (r, x, y) 'f' => 1, # failure label 'j' => 1, # either 'f' or 'p' @@ -188,6 +189,7 @@ sub define_type_bit { define_type_bit('s', $type_bit{'d'} | $type_bit{'i'} | $type_bit{'a'} | $type_bit{'n'} | $type_bit{'q'}); + define_type_bit('S', $type_bit{'d'}); define_type_bit('j', $type_bit{'f'} | $type_bit{'p'}); # Aliases (for matching purposes). @@ -1258,7 +1260,7 @@ sub basic_generator { push(@f, "r(0)"); last SWITCH; }; - /[lxy]/ and do { + /[lxyS]/ and do { push(@f, $_ . "b(Arg($arg_offset))"); last SWITCH; }; @@ -1462,6 +1464,7 @@ sub do_pack { 'y' => 10, 'Q' => 10, 'l' => 10, + 'S' => 16, 'd' => 16, 't' => 16); if ($wordsize == 64) { -- cgit v1.2.3 From eb8ffa6a071c3fe532389ba1907fc89aa9ea15eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 23 Aug 2017 12:46:42 +0200 Subject: Eliminate the beam_instrs.h file The beam_instrs.h file serves no useful purpose. Put the instructions in beam_hot.h instead. --- erts/emulator/Makefile.in | 1 - erts/emulator/beam/beam_emu.c | 1 - erts/emulator/utils/beam_makeops | 59 ++++++++++++++-------------------------- 3 files changed, 21 insertions(+), 40 deletions(-) diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in index c6511a4b49..bc7eb72221 100644 --- a/erts/emulator/Makefile.in +++ b/erts/emulator/Makefile.in @@ -546,7 +546,6 @@ endif $(TTF_DIR)/beam_cold.h \ $(TTF_DIR)/beam_hot.h \ -$(TTF_DIR)/beam_instrs.h \ $(TTF_DIR)/beam_opcodes.c \ $(TTF_DIR)/beam_opcodes.h \ $(TTF_DIR)/beam_pred_funcs.h \ diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 482f99cb78..2c37dc42b3 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -770,7 +770,6 @@ void process_main(Eterm * x_reg_array, FloatDef* f_reg_array) #endif #include "beam_hot.h" -#include "beam_instrs.h" #ifdef DEBUG /* diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops index ee8b12681f..c12073d943 100755 --- a/erts/emulator/utils/beam_makeops +++ b/erts/emulator/utils/beam_makeops @@ -92,10 +92,9 @@ my %c_code_used; # Used or not. # Definitions for instructions combined from micro instructions. my %combined_instrs; -my %combined_code; # Combined micro instructions. -my %hot_code; -my %cold_code; +my @generated_code; # Generated code. +my %sort_order; my @unnumbered_generic; my %unnumbered; @@ -384,7 +383,7 @@ while (<>) { # micro instructions. # if (/^(\w+)\s*:=\s*([\w.]+)\s*;\s*$/) { - $combined_instrs{$1} = ["$ARGV($.)","beam_instrs.h",$2]; + $combined_instrs{$1} = ["$ARGV($.)",$2]; next; } @@ -586,11 +585,8 @@ sub emulator_output { # Save the generated $code for later. # if (defined $code) { - if ($hot) { - push(@{$hot_code{$code}}, $instr); - } else { - push(@{$cold_code{$code}}, $instr); - } + $code = "OpCase($instr):\n$code"; + push @generated_code, [$hot,$code,($instr)]; } # @@ -805,19 +801,12 @@ sub emulator_output { $name = "$outdir/beam_hot.h"; open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n"; comment('C'); - print_code(\%hot_code); + print_code(1); $name = "$outdir/beam_cold.h"; open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n"; comment('C'); - print_code(\%cold_code); - - foreach my $key (keys %combined_code) { - my $name = "$outdir/$key"; - open(STDOUT, ">$name") || die "Failed to open $name for writing: $!\n"; - comment('C'); - print_indented_code(@{$combined_code{$key}}); - } + print_code(0); } sub init_item { @@ -845,19 +834,14 @@ sub q { } sub print_code { - my($ref) = @_; - my(%sorted); - my($key, $label); # Loop variables. - - foreach $key (keys %$ref) { - my($sort_key); - my($code) = ''; - foreach $label (@{$ref->{$key}}) { - $code .= "OpCase($label):\n"; - $sort_key = $label; - } - $code .= "$key\n"; - $sorted{$sort_key} = $code; + my($include_hot) = @_; + my %sorted; + + foreach my $ref (@generated_code) { + my($hot,$code,@labels) = @$ref; + next unless $hot == $include_hot; + my($sort_key) = @labels; # Use the first label as sort key. + $sorted{$sort_key} = $code; } foreach (sort keys %sorted) { @@ -1034,12 +1018,11 @@ sub comment { # sub combine_micro_instructions { my %groups; - my %group_file; # Sanity check, normalize micro instructions. foreach my $instr (keys %combined_instrs) { my $ref = $combined_instrs{$instr}; - my($def_loc,$outfile,$def) = @$ref; + my($def_loc,$def) = @$ref; my($group,@subs) = split /[.]/, $def; my $arity = 0; @subs = map { "$group.$_" } @subs; @@ -1052,14 +1035,12 @@ sub combine_micro_instructions { $arity += scalar(@c_args); } push @{$groups{$group}}, [$instr,$arity,@subs]; - $group_file{$group} = $outfile; } # Now generate code for each group. foreach my $group (sort keys %groups) { - my $code = combine_instruction_group($group, @{$groups{$group}}); - my $outfile = $group_file{$group}; - push @{$combined_code{$outfile}}, $code; + my($code,@labels) = combine_instruction_group($group, @{$groups{$group}}); + push @generated_code, [1,$code,@labels]; } } @@ -1151,6 +1132,7 @@ sub combine_instruction_group { # Now generate the code for the entire group. my $offset = 0; + my @opcase_labels; for(my $i = 0; $i < @slots; $i++) { my $key = $slots[$i]; @@ -1174,6 +1156,7 @@ sub combine_instruction_group { if ($opcase ne '') { $gcode .= "OpCase($opcase):\n"; + push @opcase_labels, $opcase; } if ($num_references{$label}) { $gcode .= "$label:\n"; @@ -1199,7 +1182,7 @@ sub combine_instruction_group { $offset = $order_to_offset{$slots[$i+1]} if $i < $#slots; } - "{\n$gcode\n}\n\n"; + ("{\n$gcode\n}\n\n",@opcase_labels); } sub micro_label { -- cgit v1.2.3 From e337151a9bc869367032af334ebd479d92dfbdee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 23 Aug 2017 12:56:36 +0200 Subject: beam_makeops: Stop using the Arg() macro Generated code uses 'I' explicitly in other places, so it can as well use 'I' when accessing the operands for instructions. --- erts/emulator/utils/beam_makeops | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops index c12073d943..6c54ab3421 100755 --- a/erts/emulator/utils/beam_makeops +++ b/erts/emulator/utils/beam_makeops @@ -1244,7 +1244,7 @@ sub basic_generator { last SWITCH; }; /[lxyS]/ and do { - push(@f, $_ . "b(Arg($arg_offset))"); + push(@f, $_ . "b(" . arg_offset($arg_offset) . ")"); last SWITCH; }; /n/ and do { @@ -1261,13 +1261,13 @@ sub basic_generator { last SWITCH; }; /d/ and do { - $var_decls .= "Eterm dst = Arg($arg_offset);\n" . + $var_decls .= "Eterm dst = " . arg_offset($arg_offset) . ";\n" . "Eterm* dst_ptr = REG_TARGET_PTR(dst);\n"; push(@f, "*dst_ptr"); last SWITCH; }; defined $arg_size{$_} and do { - push(@f, "Arg($arg_offset)"); + push @f, arg_offset($arg_offset); last SWITCH; }; @@ -1349,6 +1349,11 @@ sub basic_generator { ($size+1, $code, $pack_spec); } +sub arg_offset { + my $offset = shift; + "I[" . ($offset+1) . "]"; +} + sub expand_all { my($code,$bindings_ref) = @_; my %bindings = %{$bindings_ref}; @@ -1595,7 +1600,8 @@ sub do_pack { $did_some_packing = 1; if ($ap == 0) { - $pack_prefix .= "Eterm $packed_var = Arg($size);\n"; + $pack_prefix .= "Eterm $packed_var = " . + arg_offset($size) . ";\n"; $up .= "p"; $down = "P$down"; $this_size = 1; -- cgit v1.2.3 From 7b64965d7a22d2250d3c6582a6d1737ca325a8dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 23 Aug 2017 07:12:52 +0200 Subject: arith_instrs.tab: Clean up bsl/bsr Eliminate the all-purpose variable 'ires'. Replace it with several variables used that are used for their specific purpose. Narrow the scope of the variables. Do this to improve readability. It is not expected that it should any impact on performance. --- erts/emulator/beam/arith_instrs.tab | 49 ++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/erts/emulator/beam/arith_instrs.tab b/erts/emulator/beam/arith_instrs.tab index 224945287b..67cd7c6a2a 100644 --- a/erts/emulator/beam/arith_instrs.tab +++ b/erts/emulator/beam/arith_instrs.tab @@ -226,9 +226,6 @@ i_bsr := shift.setup_bsr.execute; shift.head() { Eterm Op1, Op2; Sint shift_left_count; - Sint ires; - Eterm* bigp; - Eterm tmp_big[2]; } shift.setup_bsr(Src1, Src2) { @@ -272,35 +269,37 @@ shift.setup_bsl(Src1, Src2) { } shift.execute(Fail, Live, Dst) { + Uint big_words_needed; + if (is_small(Op1)) { - ires = signed_val(Op1); - if (shift_left_count == 0 || ires == 0) { + Sint int_res = signed_val(Op1); + if (shift_left_count == 0 || int_res == 0) { if (is_not_integer(Op2)) { goto shift_error; } - if (ires == 0) { + if (int_res == 0) { $Dst = Op1; $NEXT0(); } } else if (shift_left_count < 0) { /* Right shift */ + Eterm bsr_res; shift_left_count = -shift_left_count; if (shift_left_count >= SMALL_BITS-1) { - $Dst = (ires < 0) ? SMALL_MINUS_ONE : SMALL_ZERO; + bsr_res = (int_res < 0) ? SMALL_MINUS_ONE : SMALL_ZERO; } else { - $Dst = make_small(ires >> shift_left_count); + bsr_res = make_small(int_res >> shift_left_count); } + $Dst = bsr_res; $NEXT0(); } else if (shift_left_count < SMALL_BITS-1) { /* Left shift */ - if ((ires > 0 && - ((~(Uint)0 << ((SMALL_BITS-1)-shift_left_count)) & - ires) == 0) || - ((~(Uint)0 << ((SMALL_BITS-1)-shift_left_count)) & - ~ires) == 0) { - $Dst = make_small(ires << shift_left_count); + if ((int_res > 0 && + ((~(Uint)0 << ((SMALL_BITS-1)-shift_left_count)) & int_res) == 0) || + ((~(Uint)0 << ((SMALL_BITS-1)-shift_left_count)) & ~int_res) == 0) { + $Dst = make_small(int_res << shift_left_count); $NEXT0(); } } - ires = 1; /* big_size(small_to_big(Op1)) */ + big_words_needed = 1; /* big_size(small_to_big(Op1)) */ goto big_shift; } else if (is_big(Op1)) { if (shift_left_count == 0) { @@ -310,20 +309,21 @@ shift.execute(Fail, Live, Dst) { $Dst = Op1; $NEXT0(); } - ires = big_size(Op1); + big_words_needed = big_size(Op1); big_shift: if (shift_left_count > 0) { /* Left shift. */ - ires += (shift_left_count / D_EXP); + big_words_needed += (shift_left_count / D_EXP); } else { /* Right shift. */ - if (ires <= (-shift_left_count / D_EXP)) { - ires = 3; /* ??? */ + if (big_words_needed <= (-shift_left_count / D_EXP)) { + big_words_needed = 3; /* ??? */ } else { - ires -= (-shift_left_count / D_EXP); + big_words_needed -= (-shift_left_count / D_EXP); } } { - ires = BIG_NEED_SIZE(ires+1); + Eterm tmp_big[2]; + Sint big_need_size = BIG_NEED_SIZE(big_words_needed+1); /* * Slightly conservative check the size to avoid @@ -331,15 +331,14 @@ shift.execute(Fail, Live, Dst) { * clearly would overflow the arity in the header * word. */ - if (ires-8 > BIG_ARITY_MAX) { + if (big_need_size-8 > BIG_ARITY_MAX) { $SYSTEM_LIMIT($Fail); } - $GC_TEST_PRESERVE(ires+1, $Live, Op1); + $GC_TEST_PRESERVE(big_need_size+1, $Live, Op1); if (is_small(Op1)) { Op1 = small_to_big(signed_val(Op1), tmp_big); } - bigp = HTOP; - Op1 = big_lshift(Op1, shift_left_count, bigp); + Op1 = big_lshift(Op1, shift_left_count, HTOP); if (is_big(Op1)) { HTOP += bignum_header_arity(*HTOP) + 1; } -- cgit v1.2.3