From 6ce5ab74806d044070768175f48605bab7fa079d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 16 Dec 2010 10:32:28 +0100 Subject: Add erts_debug:instructions/0 for listing all specific instructions erts_debug:instructions/0 is useful for finding which specific instructions that are not used at all. --- erts/emulator/beam/beam_debug.c | 19 +++++++++++++++++++ erts/emulator/beam/bif.tab | 1 + 2 files changed, 20 insertions(+) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c index b0bf14b94f..6f2a21b50a 100644 --- a/erts/emulator/beam/beam_debug.c +++ b/erts/emulator/beam/beam_debug.c @@ -157,6 +157,25 @@ void debug_dump_code(BeamInstr *I, int num) } #endif +BIF_RETTYPE +erts_debug_instructions_0(BIF_ALIST_0) +{ + int i = 0; + Uint needed = num_instructions * 2; + Eterm* hp; + Eterm res = NIL; + + for (i = 0; i < num_instructions; i++) { + needed += 2*strlen(opc[i].name); + } + hp = HAlloc(BIF_P, needed); + for (i = num_instructions-1; i >= 0; i--) { + Eterm s = erts_bld_string_n(&hp, 0, opc[i].name, strlen(opc[i].name)); + res = erts_bld_cons(&hp, 0, s, res); + } + return res; +} + Eterm erts_debug_disassemble_1(Process* p, Eterm addr) { diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab index 60b4b1946b..d9dd80fa8b 100644 --- a/erts/emulator/beam/bif.tab +++ b/erts/emulator/beam/bif.tab @@ -660,6 +660,7 @@ bif erts_debug:display/1 bif 'erl.system.debug':display/1 ebif_erts_debug_display_1 bif erts_debug:dist_ext_to_term/2 bif 'erl.system.debug':dist_ext_to_term/2 ebif_erts_debug_dist_ext_to_term_2 +bif erts_debug:instructions/0 # # Monitor testing bif's... -- cgit v1.2.3 From 89337287749d57024df06eaceca1236899efdcd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 11 Nov 2010 14:54:18 +0100 Subject: Remove the last vestiges of the allocating fmove/2 instruction There was a version of the BEAM loader and emulator that had two versions of the fmove/2 instruction, one version that allocated heap space internally and a newer version that assumed that a previous test_heap/2 instruction had already allocated the heap space. Though the allocating fmove/2 instruction is no longer supported, some vestiges of it still remains. --- erts/emulator/beam/beam_emu.c | 7 +------ erts/emulator/beam/beam_load.c | 9 --------- erts/emulator/beam/ops.tab | 8 ++------ 3 files changed, 3 insertions(+), 21 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 8a0e12dd4f..254eba42d0 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -4723,7 +4723,7 @@ apply_bif_or_nif_epilogue: NextPF(2, next); } - OpCase(fmove_new_ld): { + OpCase(fmove_ld): { Eterm fr = Arg(0); Eterm dest = make_float(HTOP); @@ -4753,11 +4753,6 @@ apply_bif_or_nif_epilogue: NextPF(2, next); } - /* - * Old allocating fmove. - */ - - #ifdef NO_FPE_SIGNALS OpCase(fclearerror): OpCase(i_fcheckerror): diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index df5602b040..2f335faad0 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -326,11 +326,6 @@ typedef struct { Literal* literals; /* Array of literals. */ LiteralPatch* literal_patches; /* Operands that need to be patched. */ Uint total_literal_size; /* Total heap size for all literals. */ - - /* - * Floating point. - */ - int new_float_instructions; /* New allocation scheme for floating point. */ } LoaderState; typedef struct { @@ -818,7 +813,6 @@ init_state(LoaderState* stp) stp->total_literal_size = 0; stp->literal_patches = 0; stp->string_patches = 0; - stp->new_float_instructions = 0; stp->may_load_nif = 0; stp->on_load = 0; } @@ -1618,7 +1612,6 @@ load_code(LoaderState* stp) BeamInstr val; BeamInstr words = 0; - stp->new_float_instructions = 1; GetTagAndValue(stp, tag, n); VerifyTag(stp, tag, TAG_u); while (n-- > 0) { @@ -2595,8 +2588,6 @@ binary_too_big_bits(LoaderState* stp, GenOpArg Size) return Size.type == TAG_u && (((Size.val+7)/8) >> (8*sizeof(Uint)-3) != 0); } -#define new_float_allocation(Stp) ((Stp)->new_float_instructions) - static GenOp* gen_put_binary(LoaderState* stp, GenOpArg Fail,GenOpArg Size, GenOpArg Unit, GenOpArg Flags, GenOpArg Src) diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index a2439d5582..8843dafa4b 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -1307,6 +1307,8 @@ fconv Arg=iqan Dst=l => move Arg x | fconv x Dst fmove q l fmove d l +fmove l d + fconv d l i_fadd l l l @@ -1322,12 +1324,6 @@ fcheckerror p => i_fcheckerror i_fcheckerror fclearerror -fmove FR=l Dst=d | new_float_allocation() => fmove_new FR Dst - -# The new instruction for moving a float out of a floating point register. -# (No allocation.) -fmove_new l d - # # New apply instructions in R10B. # -- cgit v1.2.3 From 177cca2e0ad8752022c3322f537ee836598b342a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 10 Nov 2010 14:03:50 +0100 Subject: BEAM loader: Fix bug in handling of "rest" arguments It would only really work in simple case like: select_val S=q Fail=f Size=u Rest=* => ... where all operands for a single instruction where bound to variables, and not for more complicated cases such as: i_put_tuple Dst Arity Puts=* | put PutSrc => ... --- erts/emulator/beam/beam_load.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 2f335faad0..feac89784b 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -3992,14 +3992,17 @@ transform_engine(LoaderState* st) case TOP_rest_args: { int n = *pc++; + int formal_arity = gen_opc[instr->op].arity; + int num_vars = n + (instr->arity - formal_arity); + int j = formal_arity; + var = erts_alloc(ERTS_ALC_T_LOADER_TMP, - instr->arity * sizeof(GenOpArg)); + num_vars * sizeof(GenOpArg)); for (i = 0; i < n; i++) { var[i] = def_vars[i]; } - while (i < instr->arity) { - var[i] = instr->a[i]; - i++; + while (i < num_vars) { + var[i++] = instr->a[j++]; } } break; -- cgit v1.2.3 From d310a5cb703822e95fa4c76c5c8ec566e5027a17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Fri, 10 Dec 2010 05:39:03 +0100 Subject: beam_debug: Change one occurrence of "X[0]" to "x[0]" for consistency --- erts/emulator/beam/beam_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c index 6f2a21b50a..f73d0b31a2 100644 --- a/erts/emulator/beam/beam_debug.c +++ b/erts/emulator/beam/beam_debug.c @@ -405,7 +405,7 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr) break; case 'x': /* x(N) */ if (reg_index(ap[0]) == 0) { - erts_print(to, to_arg, "X[0]"); + erts_print(to, to_arg, "x[0]"); } else { erts_print(to, to_arg, "x(%d)", reg_index(ap[0])); } -- cgit v1.2.3 From 7062f8dff2ddd110ec95c1b162273ea63c37897f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 11 Nov 2010 14:19:21 +0100 Subject: beam_debug: Fix dissambly of some variable-operand instructions The i_jump_on_val_zero/3 and i_select_tuple_arity/3 instructions were not disassembled correctly. --- erts/emulator/beam/beam_debug.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c index f73d0b31a2..5ada352202 100644 --- a/erts/emulator/beam/beam_debug.c +++ b/erts/emulator/beam/beam_debug.c @@ -559,6 +559,19 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr) } } break; + case op_i_select_tuple_arity_sfI: + { + int n = ap[-1]; + + while (n > 0) { + Uint arity = arityval(ap[0]); + erts_print(to, to_arg, " {%d} f(" HEXF ")", arity, ap[1]); + ap += 2; + size += 2; + n--; + } + } + break; case op_i_jump_on_val_sfII: { int n; @@ -569,6 +582,16 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr) } } break; + case op_i_jump_on_val_zero_sfI: + { + int n; + for (n = ap[-1]; n > 0; n--) { + erts_print(to, to_arg, "f(" HEXF ") ", ap[0]); + ap++; + size++; + } + } + break; case op_i_select_big_sf: while (ap[0]) { Eterm *bigp = (Eterm *) ap; -- cgit v1.2.3 From ec325cc785916c6a4991fe2e03747a97b7e1ae75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 29 Sep 2010 12:16:56 +0200 Subject: BEAM loader: Combine is_type/1 and is_eq/1 instructions In the transformation engine in the loader, an is_eq/1 instruction is currently always preceded by an is_type/1 instruction. Therefore, save a word and slight amount of time by combining those instructions into an is_type_eq/2 instruction. --- erts/emulator/beam/beam_load.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index feac89784b..73644ecfd6 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -3867,11 +3867,23 @@ transform_engine(LoaderState* st) if (i == 0) goto restart; break; +#if defined(TOP_is_eq) case TOP_is_eq: ASSERT(ap < instr->arity); if (*pc++ != instr->a[ap].val) goto restart; break; +#endif + case TOP_is_type_eq: + mask = *pc++; + + ASSERT(ap < instr->arity); + ASSERT(instr->a[ap].type < BEAM_NUM_TAGS); + if (((1 << instr->a[ap].type) & mask) == 0) + goto restart; + if (*pc++ != instr->a[ap].val) + goto restart; + break; case TOP_is_same_var: ASSERT(ap < instr->arity); i = *pc++; -- cgit v1.2.3 From c5df89ea0d57e41190155dcf14cbc375dc647bee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 19 Oct 2010 16:32:32 +0200 Subject: If the wordsize is 64 bits, pack up to 4 operands into a word In the 32-bit BEAM emulator, it is only possible to pack 3 register operands into one word. Therefore, the move2 instruction (that has 4 operands) needs two words for its operands. Take advantage of the larger wordsize in the 64-bit emulator and pack up to 4 operands into a single word. --- erts/emulator/beam/beam_load.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 73644ecfd6..c190efb0d1 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -1765,7 +1765,7 @@ load_code(LoaderState* stp) } stp->specific_op = specific; - CodeNeed(opc[stp->specific_op].sz+2); /* Extra margin for packing */ + CodeNeed(opc[stp->specific_op].sz+16); /* Extra margin for packing */ code[ci++] = BeamOpCode(stp->specific_op); } -- cgit v1.2.3 From 32be05dcaa4c32596bfe5372451b0b89ccd2f151 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 9 Nov 2010 15:49:06 +0100 Subject: BEAM loader: Pack more instructions using a new 'Q' type Introduce a new 'Q' type, similar to 'P' except that it can be packed. --- erts/emulator/beam/beam_debug.c | 1 + erts/emulator/beam/beam_emu.c | 1 + erts/emulator/beam/beam_load.c | 3 ++- erts/emulator/beam/ops.tab | 18 +++++++++--------- 4 files changed, 13 insertions(+), 10 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c index 5ada352202..463d5f3acc 100644 --- a/erts/emulator/beam/beam_debug.c +++ b/erts/emulator/beam/beam_debug.c @@ -525,6 +525,7 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr) ap++; break; case 'P': /* Byte offset into tuple (see beam_load.c) */ + case 'Q': /* Like 'P', but packable */ erts_print(to, to_arg, "%d", (*ap / sizeof(Eterm)) - 1); ap++; break; diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 254eba42d0..05afedcc7a 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -344,6 +344,7 @@ extern int count_instructions; #define xb(N) (*(Eterm *) (((unsigned char *)reg) + (N))) #define yb(N) (*(Eterm *) (((unsigned char *)E) + (N))) #define fb(N) (*(double *) (((unsigned char *)&(freg[0].fd)) + (N))) +#define Qb(N) (N) #define x(N) reg[N] #define y(N) E[N] #define r(N) x##N diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index c190efb0d1..8df053ae64 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -1929,7 +1929,8 @@ load_code(LoaderState* stp) } code[ci++] = (BeamInstr) stp->import[i].bf; break; - case 'P': /* Byte offset into tuple */ + case 'P': /* Byte offset into tuple or stack */ + case 'Q': /* Like 'P', but packable */ VerifyTag(stp, tag, TAG_u); tmp = tmp_op->a[arg].val; code[ci++] = (BeamInstr) ((tmp_op->a[arg].val+1) * sizeof(Eterm)); diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 8843dafa4b..9e55e2030e 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -464,16 +464,16 @@ move_return n r move S r | deallocate D | return => move_deallocate_return S r D -%macro: move_deallocate_return MoveDeallocateReturn -nonext -move_deallocate_return x r P -move_deallocate_return y r P -move_deallocate_return c r P -move_deallocate_return n r P +%macro: move_deallocate_return MoveDeallocateReturn -pack -nonext +move_deallocate_return x r Q +move_deallocate_return y r Q +move_deallocate_return c r Q +move_deallocate_return n r Q deallocate D | return => deallocate_return D %macro: deallocate_return DeallocateReturn -nonext -deallocate_return P +deallocate_return Q test_heap Need u==1 | put_list Y=y r r => test_heap_1_put_list Need Y @@ -940,11 +940,11 @@ move S r | call_last Ar P=f D => move_call_last S r P D i_move_call_last f P c r -%macro:move_call_last MoveCallLast -arg_f -nonext +%macro:move_call_last MoveCallLast -arg_f -nonext -pack move_call_last/4 -move_call_last x r f P -move_call_last y r f P +move_call_last x r f Q +move_call_last y r f Q move S=c r | call_only Ar P=f => i_move_call_only P S r move S=x r | call_only Ar P=f => move_call_only S r P -- cgit v1.2.3 From 3054e6b85da5822963128061aa0a0201f3f6512e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 9 Nov 2010 16:32:57 +0100 Subject: BEAM loader: Introduce a new move2_xxxx instruction --- erts/emulator/beam/ops.tab | 2 ++ 1 file changed, 2 insertions(+) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 9e55e2030e..7f4035c4e6 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -250,10 +250,12 @@ move R R => move X1=x Y1=y | move X2=x Y2=y => move2 X1 Y1 X2 Y2 move Y1=y X1=x | move Y2=y X2=x => move2 Y1 X1 Y2 X2 +move X1=x X2=x | move X3=x X4=x => move2 X1 X2 X3 X4 %macro: move2 Move2 -pack move2 x y x y move2 y x y x +move2 x x x x %macro:move Move -pack -gen_dest move x x -- cgit v1.2.3 From 21cd0c4f8654286ce8b14157529fcfc916fc209e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 11 Nov 2010 13:49:32 +0100 Subject: Eliminate the special instructions for selecting floats and bignums --- erts/emulator/beam/beam_debug.c | 32 ------- erts/emulator/beam/beam_emu.c | 103 ---------------------- erts/emulator/beam/beam_load.c | 190 ++++++++++++++++------------------------ erts/emulator/beam/ops.tab | 14 +-- 4 files changed, 82 insertions(+), 257 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c index 463d5f3acc..9f8d338fbd 100644 --- a/erts/emulator/beam/beam_debug.c +++ b/erts/emulator/beam/beam_debug.c @@ -48,7 +48,6 @@ void dbg_bt(Process* p, Eterm* sp); void dbg_where(BeamInstr* addr, Eterm x0, Eterm* reg); -static void print_big(int to, void *to_arg, Eterm* addr); static int print_op(int to, void *to_arg, int op, int size, BeamInstr* addr); Eterm erts_debug_same_2(Process* p, Eterm term1, Eterm term2) @@ -593,39 +592,8 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr) } } break; - case op_i_select_big_sf: - while (ap[0]) { - Eterm *bigp = (Eterm *) ap; - int arity = thing_arityval(*bigp); - print_big(to, to_arg, bigp); - size += TermWords(arity+1); - ap += TermWords(arity+1); - erts_print(to, to_arg, " f(" HEXF ") ", ap[0]); - ap++; - size++; - } - ap++; - size++; - break; } erts_print(to, to_arg, "\n"); return size; } - -static void -print_big(int to, void *to_arg, Eterm* addr) -{ - int i; - int k; - - i = BIG_SIZE(addr); - if (BIG_SIGN(addr)) - erts_print(to, to_arg, "-#integer(%d) = {", i); - else - erts_print(to, to_arg, "#integer(%d) = {", i); - erts_print(to, to_arg, "0x%x", BIG_DIGIT(addr, 0)); - for (k = 1; k < i; k++) - erts_print(to, to_arg, ",0x%x", BIG_DIGIT(addr, k)); - erts_print(to, to_arg, "}"); -} diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 05afedcc7a..266b897672 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -2846,109 +2846,6 @@ void process_main(void) Goto(*I); } - OpCase(i_select_big_sf): - { - Eterm* bigp; - Uint arity; - Eterm* given; - Uint given_arity; - Uint given_size; - - GetArg1(0, tmp_arg1); - if (is_big(tmp_arg1)) { - - /* - * The loader has sorted the bignumbers in descending order - * on the arity word. Therefore, we know that the search - * has failed as soon as we encounter an arity word less than - * the arity word of the given number. There is a zero word - * (less than any valid arity word) stored after the last bignumber. - */ - - given = big_val(tmp_arg1); - given_arity = given[0]; - given_size = thing_arityval(given_arity); - bigp = (Eterm *) &Arg(2); - while ((arity = bigp[0]) > given_arity) { - bigp += (TermWords(thing_arityval(arity) + 1) + 1) * (sizeof(BeamInstr)/sizeof(Eterm)); - } - while (bigp[0] == given_arity) { - if (memcmp(bigp+1, given+1, sizeof(Eterm)*given_size) == 0) { - BeamInstr *tmp = - ((BeamInstr *) (UWord) bigp) + TermWords(given_size + 1); - SET_I((BeamInstr *) *tmp); - Goto(*I); - } - bigp += (TermWords(thing_arityval(arity) + 1) + 1) * (sizeof(BeamInstr)/sizeof(Eterm)); - } - } - - /* - * Failed. - */ - - SET_I((BeamInstr *) Arg(1)); - Goto(*I); - } - -#if defined(ARCH_64) && !HALFWORD_HEAP - OpCase(i_select_float_sfI): - { - Uint f; - int n; - struct ValLabel { - Uint f; - BeamInstr* addr; - }; - struct ValLabel* ptr; - - GetArg1(0, tmp_arg1); - ASSERT(is_float(tmp_arg1)); - f = float_val(tmp_arg1)[1]; - n = Arg(2); - ptr = (struct ValLabel *) &Arg(3); - while (n-- > 0) { - if (ptr->f == f) { - SET_I(ptr->addr); - Goto(*I); - } - ptr++; - } - SET_I((Eterm *) Arg(1)); - Goto(*I); - } -#else - OpCase(i_select_float_sfI): - { - Uint fpart1; - Uint fpart2; - int n; - struct ValLabel { - Uint fpart1; - Uint fpart2; - BeamInstr* addr; - }; - struct ValLabel* ptr; - - GetArg1(0, tmp_arg1); - ASSERT(is_float(tmp_arg1)); - fpart1 = float_val(tmp_arg1)[1]; - fpart2 = float_val(tmp_arg1)[2]; - - n = Arg(2); - ptr = (struct ValLabel *) &Arg(3); - while (n-- > 0) { - if (ptr->fpart1 == fpart1 && ptr->fpart2 == fpart2) { - SET_I(ptr->addr); - Goto(*I); - } - ptr++; - } - SET_I((BeamInstr *) Arg(1)); - Goto(*I); - } -#endif - OpCase(set_tuple_element_sdP): { Eterm element; Eterm tuple; diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 8df053ae64..cf87e111db 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -89,13 +89,12 @@ typedef struct { } Label; /* - * Type for a operand for a generic instruction. + * Type for an operand for a generic instruction. */ typedef struct { unsigned type; /* Type of operand. */ - BeamInstr val; /* Value of operand. */ - Uint bigarity; /* Arity for bignumbers (only). */ + BeamInstr val; /* Value of operand. */ } GenOpArg; /* @@ -471,12 +470,14 @@ static int read_code_header(LoaderState* stp); static int load_code(LoaderState* stp); static GenOp* gen_element(LoaderState* stp, GenOpArg Fail, GenOpArg Index, GenOpArg Tuple, GenOpArg Dst); -static GenOp* gen_split_values(LoaderState* stp, GenOpArg S, GenOpArg Fail, +static GenOp* gen_split_values(LoaderState* stp, GenOpArg S, + GenOpArg TypeFail, GenOpArg Fail, GenOpArg Size, GenOpArg* Rest); static GenOp* gen_select_val(LoaderState* stp, GenOpArg S, GenOpArg Fail, GenOpArg Size, GenOpArg* Rest); -static GenOp* gen_select_big(LoaderState* stp, GenOpArg S, GenOpArg Fail, - GenOpArg Size, GenOpArg* Rest); +static GenOp* gen_select_literals(LoaderState* stp, GenOpArg S, + GenOpArg Fail, GenOpArg Size, + GenOpArg* Rest); static GenOp* const_select_val(LoaderState* stp, GenOpArg S, GenOpArg Fail, GenOpArg Size, GenOpArg* Rest); static GenOp* gen_func_info(LoaderState* stp, GenOpArg mod, GenOpArg Func, @@ -1972,56 +1973,6 @@ load_code(LoaderState* stp) stp->labels[tmp_op->a[arg].val].patches = ci; ci++; break; - case TAG_q: - { - Eterm lit; - - lit = stp->literals[tmp_op->a[arg].val].term; - if (is_big(lit)) { - Eterm* bigp; - Eterm *tmp; - Uint size; - Uint term_size; - - bigp = big_val(lit); - term_size = bignum_header_arity(*bigp); - size = TermWords(term_size + 1); - CodeNeed(size); - tmp = (Eterm *) (code + ci); - *tmp++ = *bigp++; - while (term_size-- > 0) { - *tmp++ = *bigp++; - } - ci +=size; - } else if (is_float(lit)) { -#if defined(ARCH_64) && !HALFWORD_HEAP - CodeNeed(1); - code[ci++] = float_val(stp->literals[tmp_op->a[arg].val].term)[1]; -#elif HALFWORD_HEAP - Eterm* fptr; - Uint size; - Eterm *tmp; - - fptr = float_val(stp->literals[tmp_op->a[arg].val].term)+1; - size = TermWords(2); - CodeNeed(size); - tmp = (Eterm *) (code + ci); - *tmp++ = *fptr++; - *tmp = *fptr; - ci += size; -#else - Eterm* fptr; - - fptr = float_val(stp->literals[tmp_op->a[arg].val].term)+1; - CodeNeed(2); - code[ci++] = *fptr++; - code[ci++] = *fptr; -#endif - } else { - LoadError0(stp, "literal is neither float nor big"); - } - } - break; default: LoadError1(stp, "unsupported primitive type '%c'", tag_to_letter[tmp_op->a[arg].type]); @@ -2233,11 +2184,12 @@ use_jump_tab(LoaderState* stp, GenOpArg Size, GenOpArg* Rest) } /* - * Predicate to test whether all values in a table are big numbers. + * Predicate to test whether all values in a table are either + * floats or bignums. */ static int -all_values_are_big(LoaderState* stp, GenOpArg Size, GenOpArg* Rest) +floats_or_bignums(LoaderState* stp, GenOpArg Size, GenOpArg* Rest) { int i; @@ -2249,9 +2201,6 @@ all_values_are_big(LoaderState* stp, GenOpArg Size, GenOpArg* Rest) if (Rest[i].type != TAG_q) { return 0; } - if (is_not_big(stp->literals[Rest[i].val].term)) { - return 0; - } if (Rest[i+1].type != TAG_f) { return 0; } @@ -3001,18 +2950,24 @@ gen_select_tuple_arity(LoaderState* stp, GenOpArg S, GenOpArg Fail, */ static GenOp* -gen_split_values(LoaderState* stp, GenOpArg S, GenOpArg Fail, - GenOpArg Size, GenOpArg* Rest) +gen_split_values(LoaderState* stp, GenOpArg S, GenOpArg TypeFail, + GenOpArg Fail, GenOpArg Size, GenOpArg* Rest) { GenOp* op1; GenOp* op2; GenOp* label; - Uint type; + GenOp* is_integer; int i; ASSERT(Size.val >= 2 && Size.val % 2 == 0); + NEW_GENOP(stp, is_integer); + is_integer->op = genop_is_integer_2; + is_integer->arity = 2; + is_integer->a[0] = TypeFail; + is_integer->a[1] = S; + NEW_GENOP(stp, label); label->op = genop_label_1; label->arity = 1; @@ -3038,15 +2993,13 @@ gen_split_values(LoaderState* stp, GenOpArg S, GenOpArg Fail, op2->a[2].type = TAG_u; op2->a[2].val = 0; - op1->next = label; - label->next = op2; - op2->next = NULL; - - type = Rest[0].type; + /* + * Split the list. + */ ASSERT(Size.type == TAG_u); for (i = 0; i < Size.val; i += 2) { - GenOp* op = (Rest[i].type == type) ? op1 : op2; + GenOp* op = (Rest[i].type == TAG_q) ? op2 : op1; int dst = 3 + op->a[2].val; ASSERT(Rest[i+1].type == TAG_f); @@ -3055,13 +3008,36 @@ gen_split_values(LoaderState* stp, GenOpArg S, GenOpArg Fail, op->arity += 2; op->a[2].val += 2; } + ASSERT(op1->a[2].val > 0); + ASSERT(op2->a[2].val > 0); /* - * None of the instructions should have zero elements in the list. + * Order the instruction sequence appropriately. */ - ASSERT(op1->a[2].val > 0); - ASSERT(op2->a[2].val > 0); + if (TypeFail.val == Fail.val) { + /* + * select_val L1 S ... (small numbers) + * label L1 + * is_integer Fail S + * select_val Fail S ... (bignums) + */ + op1->next = label; + label->next = is_integer; + is_integer->next = op2; + } else { + /* + * is_integer TypeFail S + * select_val L1 S ... (small numbers) + * label L1 + * select_val Fail S ... (bignums) + */ + is_integer->next = op1; + op1->next = label; + label->next = op2; + op1 = is_integer; + } + op2->next = NULL; return op1; } @@ -3154,8 +3130,9 @@ genopargcompare(GenOpArg* a, GenOpArg* b) } /* - * Generate a select_val instruction. We know that a jump table is not suitable, - * and that all values are of the same type (integer, atoms, floats; never bignums). + * Generate a select_val instruction. We know that a jump table + * is not suitable, and that all values are of the same type + * (integer or atoms). */ static GenOp* @@ -3169,12 +3146,7 @@ gen_select_val(LoaderState* stp, GenOpArg S, GenOpArg Fail, NEW_GENOP(stp, op); op->next = NULL; - if (Rest[0].type != TAG_q) { - op->op = genop_i_select_val_3; - } else { - ASSERT(is_float(stp->literals[Rest[0].val].term)); - op->op = genop_i_select_float_3; - } + op->op = genop_i_select_val_3; GENOP_ARITY(op, arity); op->a[0] = S; op->a[1] = Fail; @@ -3199,54 +3171,40 @@ gen_select_val(LoaderState* stp, GenOpArg S, GenOpArg Fail, return op; } -/* - * Compare function for qsort(). - */ - -static int -genbigcompare(GenOpArg* a, GenOpArg* b) -{ - int val = (int)(b->bigarity - a->bigarity); - - return val != 0 ? val : ((int) (a->val - b->val)); -} - /* * Generate a select_val instruction for big numbers. */ static GenOp* -gen_select_big(LoaderState* stp, GenOpArg S, GenOpArg Fail, +gen_select_literals(LoaderState* stp, GenOpArg S, GenOpArg Fail, GenOpArg Size, GenOpArg* Rest) { GenOp* op; - int arity = Size.val + 2 + 1; - int size = Size.val / 2; + GenOp* jump; + GenOp** prev_next = &op; + int i; - NEW_GENOP(stp, op); - op->next = NULL; - op->op = genop_i_select_big_2; - GENOP_ARITY(op, arity); - op->a[0] = S; - op->a[1] = Fail; for (i = 0; i < Size.val; i += 2) { + GenOp* op; ASSERT(Rest[i].type == TAG_q); - op->a[i+2] = Rest[i]; - op->a[i+2].bigarity = *big_val(stp->literals[op->a[i+2].val].term); - op->a[i+3] = Rest[i+1]; - } - ASSERT(i+2 == arity-1); - op->a[arity-1].type = TAG_u; - op->a[arity-1].val = 0; - - /* - * Sort the values in descending arity order. - */ - - qsort(op->a+2, size, 2*sizeof(GenOpArg), - (int (*)(const void *, const void *)) genbigcompare); + NEW_GENOP(stp, op); + op->op = genop_is_ne_exact_3; + op->arity = 3; + op->a[0] = Rest[i+1]; + op->a[1] = S; + op->a[2] = Rest[i]; + *prev_next = op; + prev_next = &op->next; + } + + NEW_GENOP(stp, jump); + jump->next = NULL; + jump->op = genop_jump_1; + jump->arity = 1; + jump->a[0] = Fail; + *prev_next = jump; return op; } diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 7f4035c4e6..2d89926bc5 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -132,8 +132,12 @@ select_val S=s Fail=f Size=u Rest=* | use_jump_tab(Size, Rest) => \ is_integer Fail=f S | select_val S=s Fail=f Size=u Rest=* | use_jump_tab(Size, Rest) => \ gen_jump_tab(S, Fail, Size, Rest) +is_integer TypeFail=f S | select_val S=s Fail=f Size=u Rest=* | \ + mixed_types(Size, Rest) => \ + gen_split_values(S, TypeFail, Fail, Size, Rest) + select_val S=s Fail=f Size=u Rest=* | mixed_types(Size, Rest) => \ - gen_split_values(S, Fail, Size, Rest) + gen_split_values(S, Fail, Fail, Size, Rest) is_integer Fail=f S | select_val S=s Fail=f Size=u Rest=* | \ fixed_size_values(Size, Rest) => gen_select_val(S, Fail, Size, Rest) @@ -141,12 +145,12 @@ is_integer Fail=f S | select_val S=s Fail=f Size=u Rest=* | \ is_atom Fail=f S | select_val S=s Fail=f Size=u Rest=* | \ fixed_size_values(Size, Rest) => gen_select_val(S, Fail, Size, Rest) +select_val S=s Fail=f Size=u Rest=* | floats_or_bignums(Size, Rest) => \ + gen_select_literals(S, Fail, Size, Rest) + select_val S=s Fail=f Size=u Rest=* | fixed_size_values(Size, Rest) => \ gen_select_val(S, Fail, Size, Rest) -select_val S=s Fail=f Size=u Rest=* | all_values_are_big(Size, Rest) => \ - gen_select_big(S, Fail, Size, Rest) - is_tuple Fail=f S | select_tuple_arity S=s Fail=f Size=u Rest=* => \ gen_select_tuple_arity(S, Fail, Size, Rest) @@ -155,8 +159,6 @@ select_tuple_arity S=s Fail=f Size=u Rest=* => \ i_select_val s f I i_select_tuple_arity s f I -i_select_big s f -i_select_float s f I i_jump_on_val_zero s f I i_jump_on_val s f I I -- cgit v1.2.3 From d734f64cc5b4e8b906f0d4f1b28f67c492a05bae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 11 Nov 2010 12:18:50 +0100 Subject: Eliminate redundant jump instructions --- erts/emulator/beam/beam_load.c | 8 ++++++++ erts/emulator/beam/ops.tab | 5 +++++ 2 files changed, 13 insertions(+) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index cf87e111db..65cd6f91bb 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -2260,6 +2260,14 @@ mixed_types(LoaderState* stp, GenOpArg Size, GenOpArg* Rest) return 0; } +static int +same_label(LoaderState* stp, GenOpArg Target, GenOpArg Label) +{ + return Target.type = TAG_f && Label.type == TAG_u && + Target.val == Label.val; +} + + /* * Generate an instruction for element/2. */ diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 2d89926bc5..81be97fd1e 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -163,6 +163,11 @@ i_select_tuple_arity s f I i_jump_on_val_zero s f I i_jump_on_val s f I I +jump Target | label Lbl | same_label(Target, Lbl) => label Lbl + +is_ne_exact L1 S1 S2 | jump Fail | label L2 | same_label(L1, L2) => \ + is_eq_exact Fail S1 S2 | label L2 + %macro: get_list GetList -pack get_list x x x get_list x x y -- cgit v1.2.3 From 56da36ef79a1c7ffb39836e1838084ed53dda6f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 14 Dec 2010 07:23:19 +0100 Subject: Support packing of the 'I' type in a 64-bit emulator In many (not all) cases, the value for the 'I' type will fit into 32 bits. --- erts/emulator/beam/beam_debug.c | 6 ++++++ erts/emulator/beam/beam_emu.c | 1 + erts/emulator/beam/beam_load.c | 5 +++++ 3 files changed, 12 insertions(+) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c index 9f8d338fbd..4fa9d60bca 100644 --- a/erts/emulator/beam/beam_debug.c +++ b/erts/emulator/beam/beam_debug.c @@ -378,6 +378,12 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr) *ap++ = packed & BEAM_LOOSE_MASK; packed >>= BEAM_LOOSE_SHIFT; break; +#ifdef ARCH_64 + case 'w': /* Shift 32 steps */ + *ap++ = packed & BEAM_WIDE_MASK; + packed >>= BEAM_WIDE_SHIFT; + break; +#endif case 'p': *sp++ = *--ap; break; diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 266b897672..bce51a001f 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -345,6 +345,7 @@ extern int count_instructions; #define yb(N) (*(Eterm *) (((unsigned char *)E) + (N))) #define fb(N) (*(double *) (((unsigned char *)&(freg[0].fd)) + (N))) #define Qb(N) (N) +#define Ib(N) (N) #define x(N) reg[N] #define y(N) E[N] #define r(N) x##N diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 65cd6f91bb..8c380536ae 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -2002,6 +2002,11 @@ load_code(LoaderState* stp) case '6': /* Shift 16 steps */ packed = (packed << BEAM_LOOSE_SHIFT) | code[--ci]; break; +#ifdef ARCH_64 + case 'w': /* Shift 32 steps */ + packed = (packed << BEAM_WIDE_SHIFT) | code[--ci]; + break; +#endif case 'p': /* Put instruction (from stack). */ code[ci++] = *--sp; break; -- cgit v1.2.3 From 33aca759679ed65ad5addebbdc8f2ad8929125a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 14 Dec 2010 07:49:13 +0100 Subject: Allow packing of some more instructions --- erts/emulator/beam/beam_emu.c | 17 +++++++---------- erts/emulator/beam/ops.tab | 17 +++++++++-------- 2 files changed, 16 insertions(+), 18 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index bce51a001f..82925fda2c 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -474,6 +474,13 @@ extern int count_instructions; HEAP_SPACE_VERIFIED(need); \ } while (0) +#define TestHeapPutList(Need, Reg) \ + do { \ + TestHeap((Need), 1); \ + PutList(Reg, r(0), r(0), StoreSimpleDest); \ + CHECK_TERM(r(0)); \ + } while (0) + #ifdef HYBRID #ifdef INCREMENTAL #define TestGlobalHeap(Nh, Live, hp) \ @@ -1407,16 +1414,6 @@ void process_main(void) Goto(*I); } - OpCase(test_heap_1_put_list_Iy): { - BeamInstr *next; - - PreFetch(2, next); - TestHeap(Arg(0), 1); - PutList(yb(Arg(1)), r(0), r(0), StoreSimpleDest); - CHECK_TERM(r(0)); - NextPF(2, next); - } - /* * Send is almost a standard call-BIF with two arguments, except for: * 1) It cannot be traced. diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 81be97fd1e..4546f056a9 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -101,16 +101,16 @@ return %macro: test_heap TestHeap -pack allocate t t -allocate_heap I I I +allocate_heap t I t deallocate I init y allocate_zero t t -allocate_heap_zero I I I +allocate_heap_zero t I t trim N Remaining => i_trim N i_trim I -test_heap I I +test_heap I t allocate_heap S u==0 R => allocate S R allocate_heap_zero S u==0 R => allocate_zero S R @@ -486,6 +486,7 @@ deallocate_return Q test_heap Need u==1 | put_list Y=y r r => test_heap_1_put_list Need Y +%macro: test_heap_1_put_list TestHeapPutList -pack test_heap_1_put_list I y # Test tuple & arity (head) @@ -585,14 +586,14 @@ is_list f y is_nonempty_list Fail=f S=rx | allocate Need Rs => is_nonempty_list_allocate Fail S Need Rs -%macro:is_nonempty_list_allocate IsNonemptyListAllocate -fail_action -is_nonempty_list_allocate f x I I -is_nonempty_list_allocate f r I I +%macro:is_nonempty_list_allocate IsNonemptyListAllocate -fail_action -pack +is_nonempty_list_allocate f x I t +is_nonempty_list_allocate f r I t is_nonempty_list F=f r | test_heap I1 I2 => is_non_empty_list_test_heap F r I1 I2 -%macro: is_non_empty_list_test_heap IsNonemptyListTestHeap -fail_action -is_non_empty_list_test_heap f r I I +%macro: is_non_empty_list_test_heap IsNonemptyListTestHeap -fail_action -pack +is_non_empty_list_test_heap f r I t %macro: is_nonempty_list IsNonemptyList -fail_action is_nonempty_list f x -- cgit v1.2.3 From 9cea01fb88b0e18e387b08fa3e6273dbf1f76082 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Fri, 17 Dec 2010 10:58:54 +0100 Subject: beam_load: Run the packing engine before loading list arguments --- erts/emulator/beam/beam_load.c | 56 +++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 28 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 8c380536ae..bc6186751e 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -1951,34 +1951,6 @@ load_code(LoaderState* stp) arg++; } - /* - * Load any list arguments using the primitive tags. - */ - - for ( ; arg < tmp_op->arity; arg++) { - switch (tmp_op->a[arg].type) { - case TAG_i: - CodeNeed(1); - code[ci++] = make_small(tmp_op->a[arg].val); - break; - case TAG_u: - case TAG_a: - case TAG_v: - CodeNeed(1); - code[ci++] = tmp_op->a[arg].val; - break; - case TAG_f: - CodeNeed(1); - code[ci] = stp->labels[tmp_op->a[arg].val].patches; - stp->labels[tmp_op->a[arg].val].patches = ci; - ci++; - break; - default: - LoadError1(stp, "unsupported primitive type '%c'", - tag_to_letter[tmp_op->a[arg].type]); - } - } - /* * The packing engine. */ @@ -2021,6 +1993,34 @@ load_code(LoaderState* stp) ASSERT(sp == stack); /* Incorrect program? */ } + /* + * Load any list arguments using the primitive tags. + */ + + for ( ; arg < tmp_op->arity; arg++) { + switch (tmp_op->a[arg].type) { + case TAG_i: + CodeNeed(1); + code[ci++] = make_small(tmp_op->a[arg].val); + break; + case TAG_u: + case TAG_a: + case TAG_v: + CodeNeed(1); + code[ci++] = tmp_op->a[arg].val; + break; + case TAG_f: + CodeNeed(1); + code[ci] = stp->labels[tmp_op->a[arg].val].patches; + stp->labels[tmp_op->a[arg].val].patches = ci; + ci++; + break; + default: + LoadError1(stp, "unsupported primitive type '%c'", + tag_to_letter[tmp_op->a[arg].type]); + } + } + /* * Handle a few special cases. */ -- cgit v1.2.3 From 824a481d5f38c748cd6efcb83cba0b8d26b88768 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 11 Nov 2010 08:23:55 +0100 Subject: Optimize creation of tuples Combine the put_tuple/2 and all following put/1 instructions to one i_put_tuple/2 instruction. In general, that will reduce the number of instruction words by 50 percent. Measurements seem to indicate that the speed is about the same. --- erts/emulator/beam/beam_debug.c | 28 ++++++++++++++++ erts/emulator/beam/beam_emu.c | 42 ++++++++++++++++++----- erts/emulator/beam/beam_load.c | 74 +++++++++++++++++++++++++++++++++++++++++ erts/emulator/beam/ops.tab | 39 ++++++++-------------- 4 files changed, 149 insertions(+), 34 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c index 4fa9d60bca..6c16c24d0d 100644 --- a/erts/emulator/beam/beam_debug.c +++ b/erts/emulator/beam/beam_debug.c @@ -330,6 +330,7 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr) BeamInstr packed = 0; /* Accumulator for packed operations. */ BeamInstr args[8]; /* Arguments for this instruction. */ BeamInstr* ap; /* Pointer to arguments. */ + BeamInstr* unpacked; /* Unpacked arguments */ start_prog = opc[op].pack; @@ -551,6 +552,7 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr) * Print more information about certain instructions. */ + unpacked = ap; ap = addr + size; switch (op) { case op_i_select_val_sfI: @@ -598,6 +600,32 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr) } } break; + case op_i_put_tuple_rI: + case op_i_put_tuple_xI: + case op_i_put_tuple_yI: + { + int n = unpacked[-1]; + + while (n > 0) { + if (!is_header(ap[0])) { + erts_print(to, to_arg, " %T", (Eterm) ap[0]); + } else { + switch ((ap[0] >> 2) & 0x03) { + case R_REG_DEF: + erts_print(to, to_arg, " x(0)"); + break; + case X_REG_DEF: + erts_print(to, to_arg, " x(%d)", ap[0] >> 4); + break; + case Y_REG_DEF: + erts_print(to, to_arg, " y(%d)", ap[0] >> 4); + break; + } + } + ap++, size++, n--; + } + } + break; } erts_print(to, to_arg, "\n"); diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 82925fda2c..e36ac1f1e6 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -525,6 +525,11 @@ extern int count_instructions; SWAPIN; \ } while (0) +#define PutTuple(Dst, Arity) \ + do { \ + Dst = make_tuple(HTOP); \ + pt_arity = (Arity); \ + } while (0) /* * Check that we haven't used the reductions and jump to function pointed to by @@ -732,15 +737,6 @@ extern int count_instructions; (Dest) = (* (Eterm *) EXPAND_POINTER(tmp_arg1)); \ } while (0) -#define PutTuple(Arity, Src, Dest) \ - ASSERT(is_arity_value(Arity)); \ - Dest = make_tuple(HTOP); \ - HTOP[0] = (Arity); \ - HTOP[1] = (Src); \ - HTOP += 2 - -#define Put(Word) *HTOP++ = (Word) - #define EqualImmed(X, Y, Action) if (X != Y) { Action; } #define IsFloat(Src, Fail) if (is_not_float(Src)) { Fail; } @@ -1155,6 +1151,8 @@ void process_main(void) Uint temp_bits; /* Temporary used by BsSkipBits2 & BsGetInteger2 */ + Eterm pt_arity; /* Used by do_put_tuple */ + ERL_BITS_DECLARE_STATEP; /* Has to be last declaration */ @@ -1924,6 +1922,32 @@ void process_main(void) Goto(*I); } + do_put_tuple: { + Eterm* hp = HTOP; + + *hp++ = make_arityval(pt_arity); + + do { + Eterm term = *I++; + switch (term & _TAG_IMMED1_MASK) { + case (R_REG_DEF << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER: + *hp++ = r(0); + break; + case (X_REG_DEF << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER: + *hp++ = x(term >> _TAG_IMMED1_SIZE); + break; + case (Y_REG_DEF << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER: + *hp++ = y(term >> _TAG_IMMED1_SIZE); + break; + default: + *hp++ = term; + break; + } + } while (--pt_arity != 0); + HTOP = hp; + Goto(*I); + } + /* * All guards with zero arguments have special instructions: * self/0 diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index bc6186751e..a476e439ca 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -2015,6 +2015,30 @@ load_code(LoaderState* stp) stp->labels[tmp_op->a[arg].val].patches = ci; ci++; break; + case TAG_r: + CodeNeed(1); + code[ci++] = (R_REG_DEF << _TAG_PRIMARY_SIZE) | + TAG_PRIMARY_HEADER; + break; + case TAG_x: + CodeNeed(1); + code[ci++] = (tmp_op->a[arg].val << _TAG_IMMED1_SIZE) | + (X_REG_DEF << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER; + break; + case TAG_y: + CodeNeed(1); + code[ci++] = (tmp_op->a[arg].val << _TAG_IMMED1_SIZE) | + (Y_REG_DEF << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER; + break; + case TAG_n: + CodeNeed(1); + code[ci++] = NIL; + break; + case TAG_q: + CodeNeed(1); + new_literal_patch(stp, ci); + code[ci++] = tmp_op->a[arg].val; + break; default: LoadError1(stp, "unsupported primitive type '%c'", tag_to_letter[tmp_op->a[arg].type]); @@ -3440,6 +3464,56 @@ gen_guard_bif3(LoaderState* stp, GenOpArg Fail, GenOpArg Live, GenOpArg Bif, return op; } +static GenOp* +tuple_append_put5(LoaderState* stp, GenOpArg Arity, GenOpArg Dst, + GenOpArg* Puts, GenOpArg S1, GenOpArg S2, GenOpArg S3, + GenOpArg S4, GenOpArg S5) +{ + GenOp* op; + int arity = Arity.val; /* Arity of tuple, not the instruction */ + int i; + + NEW_GENOP(stp, op); + op->next = NULL; + GENOP_ARITY(op, arity+2+5); + op->op = genop_i_put_tuple_2; + op->a[0] = Dst; + op->a[1].type = TAG_u; + op->a[1].val = arity + 5; + for (i = 0; i < arity; i++) { + op->a[i+2] = Puts[i]; + } + op->a[arity+2] = S1; + op->a[arity+3] = S2; + op->a[arity+4] = S3; + op->a[arity+5] = S4; + op->a[arity+6] = S5; + return op; +} + +static GenOp* +tuple_append_put(LoaderState* stp, GenOpArg Arity, GenOpArg Dst, + GenOpArg* Puts, GenOpArg S) +{ + GenOp* op; + int arity = Arity.val; /* Arity of tuple, not the instruction */ + int i; + + NEW_GENOP(stp, op); + op->next = NULL; + GENOP_ARITY(op, arity+2+1); + op->op = genop_i_put_tuple_2; + op->a[0] = Dst; + op->a[1].type = TAG_u; + op->a[1].val = arity + 1; + for (i = 0; i < arity; i++) { + op->a[i+2] = Puts[i]; + } + op->a[arity+2] = S; + return op; +} + + /* * Freeze the code in memory, move the string table into place, diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 4546f056a9..f629cc80b6 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -345,25 +345,21 @@ i_is_eq_immed f y c # Putting things. # -put_tuple Arity Dst | put V => i_put_tuple Arity V Dst - -%macro: i_put_tuple PutTuple -pack -i_put_tuple A x x -i_put_tuple A y x -i_put_tuple A r x -i_put_tuple A n x -i_put_tuple A c x -i_put_tuple A x y -i_put_tuple A x r -i_put_tuple A y r -i_put_tuple A n r -i_put_tuple A c r +put_tuple Arity Dst => i_put_tuple Dst u -%cold -i_put_tuple A r y -i_put_tuple A y y -i_put_tuple A c y -%hot +i_put_tuple Dst Arity Puts=* | put S1 | put S2 | \ + put S3 | put S4 | put S5 => \ + tuple_append_put5(Arity, Dst, Puts, S1, S2, S3, S4, S5) + +i_put_tuple Dst Arity Puts=* | put S => \ + tuple_append_put(Arity, Dst, Puts, S) + +i_put_tuple/2 + +%macro:i_put_tuple PutTuple -pack -goto:do_put_tuple +i_put_tuple r I +i_put_tuple x I +i_put_tuple y I %macro:put_list PutList -pack -gen_dest @@ -416,13 +412,6 @@ put_list x r r put_list s s d %hot -%macro: put Put -put x -put r -put y -put c -put n - %macro: i_fetch FetchArgs -pack i_fetch c c i_fetch c r -- cgit v1.2.3 From 4d05097ec97cc18c795ffee83d1d08d396e16814 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 11 Nov 2010 19:01:22 +0100 Subject: Simplify a select_val instruction that selects only one value The compiler does not generate select_val instructions that only selects one value, but the loader may previously have created such an instruction when it splitted a select_val instruction that selected on bignums. --- erts/emulator/beam/beam_load.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index a476e439ca..73f057929e 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -3095,6 +3095,29 @@ gen_jump_tab(LoaderState* stp, GenOpArg S, GenOpArg Fail, GenOpArg Size, GenOpAr ASSERT(Size.val >= 2 && Size.val % 2 == 0); + /* + * If there is only one choice, don't generate a jump table. + */ + if (Size.val == 2) { + GenOp* jump; + + NEW_GENOP(stp, op); + op->arity = 3; + op->op = genop_is_ne_exact_3; + op->a[0] = Rest[1]; + op->a[1] = S; + op->a[2] = Rest[0]; + + NEW_GENOP(stp, jump); + jump->next = NULL; + jump->arity = 1; + jump->op = genop_jump_1; + jump->a[0] = Fail; + + op->next = jump; + return op; + } + /* * Calculate the minimum and maximum values and size of jump table. */ -- cgit v1.2.3 From a3981bb0d5742098e484e49661b7f378f18069d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 8 Dec 2010 06:55:37 +0100 Subject: beam_emu: Clean up calling of the error_handler module There were two separate functions (call_error_handler() and call_breakpoint_handler()) that were identical except for the name of the function in the error_handler module being called. Generalize call_error_handler() by adding a function name argument so that it can be used for both purposes. Also let the call_error_handler() return the new program counter instead of passing it in c_p->i. That slightly decrease the code size at the call site. There is also no need to use the Dispatch() macro to yet again decrease the reduction counter, because that has just been done by the call instruction that caused the execution of the call_error_handler or i_debug_breakpoint instruction. --- erts/emulator/beam/beam_emu.c | 79 +++++++------------------------------------ 1 file changed, 13 insertions(+), 66 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index e36ac1f1e6..2f90f67351 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -998,8 +998,8 @@ static void save_stacktrace(Process* c_p, BeamInstr* pc, Eterm* reg, BifFunction bf, Eterm args); static struct StackTrace * get_trace_from_exc(Eterm exc); static Eterm make_arglist(Process* c_p, Eterm* reg, int a); -static Eterm call_error_handler(Process* p, BeamInstr* ip, Eterm* reg); -static Eterm call_breakpoint_handler(Process* p, BeamInstr* fi, Eterm* reg); +static BeamInstr* call_error_handler(Process* p, BeamInstr* ip, + Eterm* reg, Eterm func); static BeamInstr* fixed_apply(Process* p, Eterm* reg, Uint arity); static BeamInstr* apply(Process* p, Eterm module, Eterm function, Eterm args, Eterm* reg); @@ -2976,12 +2976,11 @@ void process_main(void) */ SWAPOUT; reg[0] = r(0); - tmp_arg1 = call_error_handler(c_p, I-3, reg); + I = call_error_handler(c_p, I-3, reg, am_undefined_function); r(0) = reg[0]; SWAPIN; - if (tmp_arg1) { - SET_I(c_p->i); - Dispatch(); + if (I) { + Goto(*I); } /* Fall through */ @@ -4884,12 +4883,11 @@ apply_bif_or_nif_epilogue: OpCase(i_debug_breakpoint): { SWAPOUT; reg[0] = r(0); - tmp_arg1 = call_breakpoint_handler(c_p, I-3, reg); + I = call_error_handler(c_p, I-3, reg, am_breakpoint); r(0) = reg[0]; SWAPIN; - if (tmp_arg1) { - SET_I(c_p->i); - Dispatch(); + if (I) { + Goto(*I); } goto no_error_handler; } @@ -5639,8 +5637,8 @@ build_stacktrace(Process* c_p, Eterm exc) { } -static Eterm -call_error_handler(Process* p, BeamInstr* fi, Eterm* reg) +static BeamInstr* +call_error_handler(Process* p, BeamInstr* fi, Eterm* reg, Eterm func) { Eterm* hp; Export* ep; @@ -5652,62 +5650,12 @@ call_error_handler(Process* p, BeamInstr* fi, Eterm* reg) /* * Search for the error_handler module. */ - ep = erts_find_function(erts_proc_get_error_handler(p), - am_undefined_function, 3); - if (ep == NULL) { /* No error handler */ - p->current = fi; - p->freason = EXC_UNDEF; - return 0; - } - p->i = ep->address; - - /* - * Create a list with all arguments in the x registers. - */ - - arity = fi[2]; - sz = 2 * arity; - if (HeapWordsLeft(p) < sz) { - erts_garbage_collect(p, sz, reg, arity); - } - hp = HEAP_TOP(p); - HEAP_TOP(p) += sz; - args = NIL; - for (i = arity-1; i >= 0; i--) { - args = CONS(hp, reg[i], args); - hp += 2; - } - - /* - * Set up registers for call to error_handler:undefined_function/3. - */ - reg[0] = fi[0]; - reg[1] = fi[1]; - reg[2] = args; - return 1; -} - -static Eterm -call_breakpoint_handler(Process* p, BeamInstr* fi, Eterm* reg) -{ - Eterm* hp; - Export* ep; - int arity; - Eterm args; - Uint sz; - int i; - - /* - * Search for error handler module. - */ - ep = erts_find_function(erts_proc_get_error_handler(p), - am_breakpoint, 3); + ep = erts_find_function(erts_proc_get_error_handler(p), func, 3); if (ep == NULL) { /* No error handler */ p->current = fi; p->freason = EXC_UNDEF; return 0; } - p->i = ep->address; /* * Create a list with all arguments in the x registers. @@ -5727,15 +5675,14 @@ call_breakpoint_handler(Process* p, BeamInstr* fi, Eterm* reg) } /* - * Set up registers for call to error_handler:breakpoint/3. + * Set up registers for call to error_handler:/3. */ reg[0] = fi[0]; reg[1] = fi[1]; reg[2] = args; - return 1; + return ep->address; } - static Export* apply_setup_error_handler(Process* p, Eterm module, Eterm function, Uint arity, Eterm* reg) -- cgit v1.2.3 From 0c44ee46cd9f108599aad58c7f0f62b34dad0615 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 8 Dec 2010 09:04:34 +0100 Subject: beam_emu: Don't inline helper functions into process_main() By default, GCC will inline calls to helper functions. Since process_main() is already huge, there is no reason to inline the helper functions (and some of them are used very seldom). --- erts/emulator/beam/beam_emu.c | 47 ++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 12 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 2f90f67351..82999ec79e 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -989,8 +989,41 @@ extern int count_instructions; #define IsPid(Src, Fail) if (is_not_pid(Src)) { Fail; } #define IsRef(Src, Fail) if (is_not_ref(Src)) { Fail; } -static BifFunction translate_gc_bif(void* gcf); -static BeamInstr* handle_error(Process* c_p, BeamInstr* pc, Eterm* reg, BifFunction bf); +/* + * process_main() is already huge, so we want to avoid inlining + * into it. Especially functions that are seldom used. + */ +#ifdef __GNUC__ +# define NOINLINE __attribute__((__noinline__)) +#else +# define NOINLINE +#endif + +/* + * The following functions are called directly by process_main(). + * Don't inline them. + */ +static BifFunction translate_gc_bif(void* gcf) NOINLINE; +static BeamInstr* handle_error(Process* c_p, BeamInstr* pc, + Eterm* reg, BifFunction bf) NOINLINE; +static BeamInstr* call_error_handler(Process* p, BeamInstr* ip, + Eterm* reg, Eterm func) NOINLINE; +static BeamInstr* fixed_apply(Process* p, Eterm* reg, Uint arity) NOINLINE; +static BeamInstr* apply(Process* p, Eterm module, Eterm function, + Eterm args, Eterm* reg) NOINLINE; +static int hibernate(Process* c_p, Eterm module, Eterm function, + Eterm args, Eterm* reg) NOINLINE; +static BeamInstr* call_fun(Process* p, int arity, + Eterm* reg, Eterm args) NOINLINE; +static BeamInstr* apply_fun(Process* p, Eterm fun, + Eterm args, Eterm* reg) NOINLINE; +static Eterm new_fun(Process* p, Eterm* reg, + ErlFunEntry* fe, int num_free) NOINLINE; + + +/* + * Functions not directly called by process_main(). OK to inline. + */ static BeamInstr* next_catch(Process* c_p, Eterm *reg); static void terminate_proc(Process* c_p, Eterm Value); static Eterm add_stacktrace(Process* c_p, Eterm Value, Eterm exc); @@ -998,16 +1031,6 @@ static void save_stacktrace(Process* c_p, BeamInstr* pc, Eterm* reg, BifFunction bf, Eterm args); static struct StackTrace * get_trace_from_exc(Eterm exc); static Eterm make_arglist(Process* c_p, Eterm* reg, int a); -static BeamInstr* call_error_handler(Process* p, BeamInstr* ip, - Eterm* reg, Eterm func); -static BeamInstr* fixed_apply(Process* p, Eterm* reg, Uint arity); -static BeamInstr* apply(Process* p, Eterm module, Eterm function, - Eterm args, Eterm* reg); -static int hibernate(Process* c_p, Eterm module, Eterm function, - Eterm args, Eterm* reg); -static BeamInstr* call_fun(Process* p, int arity, Eterm* reg, Eterm args); -static BeamInstr* apply_fun(Process* p, Eterm fun, Eterm args, Eterm* reg); -static Eterm new_fun(Process* p, Eterm* reg, ErlFunEntry* fe, int num_free); #if defined(VXWORKS) static int init_done; -- cgit v1.2.3 From a24830ce9db4325b51af56262094bb1d32ef4e2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Fri, 19 Nov 2010 10:47:05 +0100 Subject: beam_emu: Eliminate sloppy use of tmp_arg1 and tmp_arg2 The tmp_arg1 and tmp_arg2 variables are intended for transferring values from the fetch/2 instructions to instructions such as i_plus/3. In many places, however, tmp_arg1 and tmp_arg2 are used as general temporary variables within a single instruction. Improve the code generation by replacing sloppy use of tmp_arg1 and tmp_arg2 with block-local variables. In most cases, that will allow the temporary values to be kept in registers. --- erts/emulator/beam/beam_emu.c | 603 +++++++++++++++++++++++------------------- 1 file changed, 330 insertions(+), 273 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 82999ec79e..09f6fc8cef 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -1503,9 +1503,9 @@ void process_main(void) GetArg1(2, tuple); if (is_tuple(tuple)) { Eterm* tp = tuple_val(tuple); - tmp_arg2 = Arg(1); - if (tmp_arg2 <= arityval(*tp)) { - Eterm result = tp[tmp_arg2]; + Eterm pos = Arg(1); + if (pos <= arityval(*tp)) { + Eterm result = tp[pos]; StoreBifResult(3, result); } } @@ -1862,8 +1862,22 @@ void process_main(void) NextPF(0, next); } + { + Eterm select_val; + + OpCase(i_select_tuple_arity_sfI): + GetArg1(0, select_val); + + if (is_tuple(select_val)) { + select_val = *tuple_val(select_val); + goto do_binary_search; + } + SET_I((BeamInstr *) Arg(1)); + Goto(*I); + + OpCase(i_select_val_sfI): - GetArg1(0, tmp_arg1); + GetArg1(0, select_val); do_binary_search: { @@ -1900,9 +1914,9 @@ void process_main(void) unsigned int boffset = ((unsigned int)bdiff >> 1) & ~(sizeof(struct Pairs)-1); mid = (struct Pairs*)((char*)low + boffset); - if (tmp_arg1 < mid->val) { + if (select_val < mid->val) { high = mid; - } else if (tmp_arg1 > mid->val) { + } else if (select_val > mid->val) { low = mid + 1; } else { SET_I(mid->addr); @@ -1912,6 +1926,7 @@ void process_main(void) SET_I((BeamInstr *) Arg(1)); Goto(*I); } + } OpCase(i_jump_on_val_zero_sfI): { @@ -2608,23 +2623,25 @@ void process_main(void) OpCase(i_int_bnot_jsId): { - GetArg1(1, tmp_arg1); - if (is_small(tmp_arg1)) { - tmp_arg1 = make_small(~signed_val(tmp_arg1)); + Eterm bnot_val; + + GetArg1(1, bnot_val); + if (is_small(bnot_val)) { + bnot_val = make_small(~signed_val(bnot_val)); } else { Uint live = Arg(2); SWAPOUT; reg[0] = r(0); - reg[live] = tmp_arg1; - tmp_arg1 = erts_gc_bnot(c_p, reg, live); + reg[live] = bnot_val; + bnot_val = erts_gc_bnot(c_p, reg, live); r(0) = reg[0]; SWAPIN; ERTS_HOLE_CHECK(c_p); - if (is_nil(tmp_arg1)) { + if (is_nil(bnot_val)) { goto lb_Cl_error; } } - StoreBifResult(3, tmp_arg1); + StoreBifResult(3, bnot_val); } badarith: @@ -2879,18 +2896,6 @@ void process_main(void) goto do_schedule1; } - OpCase(i_select_tuple_arity_sfI): - { - GetArg1(0, tmp_arg1); - - if (is_tuple(tmp_arg1)) { - tmp_arg1 = *tuple_val(tmp_arg1); - goto do_binary_search; - } - SET_I((BeamInstr *) Arg(1)); - Goto(*I); - } - OpCase(set_tuple_element_sdP): { Eterm element; Eterm tuple; @@ -2936,15 +2941,17 @@ void process_main(void) the first argument. We also handle atom tags in the first argument for backwards compatibility. */ - GetArg2(0, tmp_arg1, tmp_arg2); - c_p->fvalue = tmp_arg2; + Eterm raise_val1; + Eterm raise_val2; + GetArg2(0, raise_val1, raise_val2); + c_p->fvalue = raise_val2; if (c_p->freason == EXC_NULL) { /* a safety check for the R10-0 case; should not happen */ c_p->ftrace = NIL; c_p->freason = EXC_ERROR; } /* for R10-0 code, keep existing c_p->ftrace and hope it's correct */ - switch (tmp_arg1) { + switch (raise_val1) { case am_throw: c_p->freason = EXC_THROWN & ~EXF_SAVETRACE; break; @@ -2960,8 +2967,8 @@ void process_main(void) passed from a user! Currently only expecting generated calls. */ struct StackTrace *s; - c_p->ftrace = tmp_arg1; - s = get_trace_from_exc(tmp_arg1); + c_p->ftrace = raise_val1; + s = get_trace_from_exc(raise_val1); if (s == NULL) { c_p->freason = EXC_ERROR; } else { @@ -2973,10 +2980,12 @@ void process_main(void) } OpCase(badmatch_s): { - GetArg1(0, tmp_arg1); - c_p->fvalue = tmp_arg1; - c_p->freason = BADMATCH; - } + Eterm badmatch_val; + + GetArg1(0, badmatch_val); + c_p->fvalue = badmatch_val; + c_p->freason = BADMATCH; + } /* Fall through here */ find_func_info: { @@ -3026,128 +3035,142 @@ void process_main(void) } } - OpCase(call_nif): - { - /* - * call_nif is always first instruction in function: - * - * I[-3]: Module - * I[-2]: Function - * I[-1]: Arity - * I[0]: &&call_nif - * I[1]: Function pointer to NIF function - * I[2]: Pointer to erl_module_nif - */ - BifFunction vbf; - - c_p->current = I-3; /* current and vbf set to please handle_error */ - SWAPOUT; - c_p->fcalls = FCALLS - 1; - PROCESS_MAIN_CHK_LOCKS(c_p); - tmp_arg2 = I[-1]; - ERTS_SMP_UNREQ_PROC_MAIN_LOCK(c_p); + { + Eterm nif_bif_result; + Eterm bif_nif_arity; - ASSERT(!ERTS_PROC_IS_EXITING(c_p)); - { - typedef Eterm NifF(struct enif_environment_t*, int argc, Eterm argv[]); - NifF* fp = vbf = (NifF*) I[1]; - struct enif_environment_t env; - erts_pre_nif(&env, c_p, (struct erl_module_nif*)I[2]); - reg[0] = r(0); - tmp_arg1 = (*fp)(&env, tmp_arg2, reg); - erts_post_nif(&env); - } - ASSERT(!ERTS_PROC_IS_EXITING(c_p) || is_non_value(tmp_arg1)); - PROCESS_MAIN_CHK_LOCKS(c_p); - goto apply_bif_or_nif_epilogue; - - OpCase(apply_bif): - /* - * At this point, I points to the code[3] in the export entry for - * the BIF: - * - * code[0]: Module - * code[1]: Function - * code[2]: Arity - * code[3]: &&apply_bif - * code[4]: Function pointer to BIF function - */ + OpCase(call_nif): + { + /* + * call_nif is always first instruction in function: + * + * I[-3]: Module + * I[-2]: Function + * I[-1]: Arity + * I[0]: &&call_nif + * I[1]: Function pointer to NIF function + * I[2]: Pointer to erl_module_nif + */ + BifFunction vbf; - c_p->current = I-3; /* In case we apply process_info/1,2 or load_nif/1 */ - c_p->i = I; /* In case we apply check_process_code/2. */ - c_p->arity = 0; /* To allow garbage collection on ourselves - * (check_process_code/2). - */ - SWAPOUT; - c_p->fcalls = FCALLS - 1; - vbf = (BifFunction) Arg(0); - PROCESS_MAIN_CHK_LOCKS(c_p); - tmp_arg2 = I[-1]; - ASSERT(tmp_arg2 <= 3); - ERTS_SMP_UNREQ_PROC_MAIN_LOCK(c_p); - switch (tmp_arg2) { - case 3: + c_p->current = I-3; /* current and vbf set to please handle_error */ + SWAPOUT; + c_p->fcalls = FCALLS - 1; + PROCESS_MAIN_CHK_LOCKS(c_p); + bif_nif_arity = I[-1]; + ERTS_SMP_UNREQ_PROC_MAIN_LOCK(c_p); + + ASSERT(!ERTS_PROC_IS_EXITING(c_p)); { - Eterm (*bf)(Process*, Eterm, Eterm, Eterm, BeamInstr*) = vbf; - ASSERT(!ERTS_PROC_IS_EXITING(c_p)); - tmp_arg1 = (*bf)(c_p, r(0), x(1), x(2), I); - ASSERT(!ERTS_PROC_IS_EXITING(c_p) || is_non_value(tmp_arg1)); - PROCESS_MAIN_CHK_LOCKS(c_p); + typedef Eterm NifF(struct enif_environment_t*, int argc, Eterm argv[]); + NifF* fp = vbf = (NifF*) I[1]; + struct enif_environment_t env; + erts_pre_nif(&env, c_p, (struct erl_module_nif*)I[2]); + reg[0] = r(0); + nif_bif_result = (*fp)(&env, bif_nif_arity, reg); + erts_post_nif(&env); } - break; - case 2: - { - Eterm (*bf)(Process*, Eterm, Eterm, BeamInstr*) = vbf; - ASSERT(!ERTS_PROC_IS_EXITING(c_p)); - tmp_arg1 = (*bf)(c_p, r(0), x(1), I); - ASSERT(!ERTS_PROC_IS_EXITING(c_p) || is_non_value(tmp_arg1)); - PROCESS_MAIN_CHK_LOCKS(c_p); + ASSERT(!ERTS_PROC_IS_EXITING(c_p) || is_non_value(nif_bif_result)); + PROCESS_MAIN_CHK_LOCKS(c_p); + goto apply_bif_or_nif_epilogue; + + OpCase(apply_bif): + /* + * At this point, I points to the code[3] in the export entry for + * the BIF: + * + * code[0]: Module + * code[1]: Function + * code[2]: Arity + * code[3]: &&apply_bif + * code[4]: Function pointer to BIF function + */ + + c_p->current = I-3; /* In case we apply process_info/1,2 or load_nif/1 */ + c_p->i = I; /* In case we apply check_process_code/2. */ + c_p->arity = 0; /* To allow garbage collection on ourselves + * (check_process_code/2). + */ + SWAPOUT; + c_p->fcalls = FCALLS - 1; + vbf = (BifFunction) Arg(0); + PROCESS_MAIN_CHK_LOCKS(c_p); + bif_nif_arity = I[-1]; + ASSERT(bif_nif_arity <= 3); + ERTS_SMP_UNREQ_PROC_MAIN_LOCK(c_p); + switch (bif_nif_arity) { + case 3: + { + Eterm (*bf)(Process*, Eterm, Eterm, Eterm, BeamInstr*) = vbf; + ASSERT(!ERTS_PROC_IS_EXITING(c_p)); + nif_bif_result = (*bf)(c_p, r(0), x(1), x(2), I); + ASSERT(!ERTS_PROC_IS_EXITING(c_p) || + is_non_value(nif_bif_result)); + PROCESS_MAIN_CHK_LOCKS(c_p); + } + break; + case 2: + { + Eterm (*bf)(Process*, Eterm, Eterm, BeamInstr*) = vbf; + ASSERT(!ERTS_PROC_IS_EXITING(c_p)); + nif_bif_result = (*bf)(c_p, r(0), x(1), I); + ASSERT(!ERTS_PROC_IS_EXITING(c_p) || + is_non_value(nif_bif_result)); + PROCESS_MAIN_CHK_LOCKS(c_p); + } + break; + case 1: + { + Eterm (*bf)(Process*, Eterm, BeamInstr*) = vbf; + ASSERT(!ERTS_PROC_IS_EXITING(c_p)); + nif_bif_result = (*bf)(c_p, r(0), I); + ASSERT(!ERTS_PROC_IS_EXITING(c_p) || + is_non_value(nif_bif_result)); + PROCESS_MAIN_CHK_LOCKS(c_p); + } + break; + case 0: + { + Eterm (*bf)(Process*, BeamInstr*) = vbf; + ASSERT(!ERTS_PROC_IS_EXITING(c_p)); + nif_bif_result = (*bf)(c_p, I); + ASSERT(!ERTS_PROC_IS_EXITING(c_p) || + is_non_value(nif_bif_result)); + PROCESS_MAIN_CHK_LOCKS(c_p); + break; + } + default: + erl_exit(1, "apply_bif: invalid arity: %u\n", + bif_nif_arity); } - break; - case 1: - { - Eterm (*bf)(Process*, Eterm, BeamInstr*) = vbf; - ASSERT(!ERTS_PROC_IS_EXITING(c_p)); - tmp_arg1 = (*bf)(c_p, r(0), I); - ASSERT(!ERTS_PROC_IS_EXITING(c_p) || is_non_value(tmp_arg1)); - PROCESS_MAIN_CHK_LOCKS(c_p); + + apply_bif_or_nif_epilogue: + ERTS_SMP_REQ_PROC_MAIN_LOCK(c_p); + ERTS_HOLE_CHECK(c_p); + if (c_p->mbuf) { + reg[0] = r(0); + nif_bif_result = erts_gc_after_bif_call(c_p, nif_bif_result, + reg, bif_nif_arity); + r(0) = reg[0]; } - break; - case 0: - { - Eterm (*bf)(Process*, BeamInstr*) = vbf; - ASSERT(!ERTS_PROC_IS_EXITING(c_p)); - tmp_arg1 = (*bf)(c_p, I); - ASSERT(!ERTS_PROC_IS_EXITING(c_p) || is_non_value(tmp_arg1)); - PROCESS_MAIN_CHK_LOCKS(c_p); - break; + SWAPIN; /* There might have been a garbage collection. */ + FCALLS = c_p->fcalls; + if (is_value(nif_bif_result)) { + r(0) = nif_bif_result; + CHECK_TERM(r(0)); + SET_I(c_p->cp); + Goto(*I); + } else if (c_p->freason == TRAP) { + SET_I(*((BeamInstr **) (UWord) ((c_p)->def_arg_reg + 3))); + r(0) = c_p->def_arg_reg[0]; + x(1) = c_p->def_arg_reg[1]; + x(2) = c_p->def_arg_reg[2]; + Dispatch(); } - } -apply_bif_or_nif_epilogue: - ERTS_SMP_REQ_PROC_MAIN_LOCK(c_p); - ERTS_HOLE_CHECK(c_p); - if (c_p->mbuf) { reg[0] = r(0); - tmp_arg1 = erts_gc_after_bif_call(c_p, tmp_arg1, reg, tmp_arg2); - r(0) = reg[0]; - } - SWAPIN; /* There might have been a garbage collection. */ - FCALLS = c_p->fcalls; - if (is_value(tmp_arg1)) { - r(0) = tmp_arg1; - CHECK_TERM(r(0)); - SET_I(c_p->cp); - Goto(*I); - } else if (c_p->freason == TRAP) { - SET_I(*((BeamInstr **) (UWord) ((c_p)->def_arg_reg + 3))); - r(0) = c_p->def_arg_reg[0]; - x(1) = c_p->def_arg_reg[1]; - x(2) = c_p->def_arg_reg[2]; - Dispatch(); + I = handle_error(c_p, c_p->cp, reg, vbf); + goto post_error_handling; } - reg[0] = r(0); - I = handle_error(c_p, c_p->cp, reg, vbf); - goto post_error_handling; } OpCase(i_get_sd): @@ -3161,10 +3184,14 @@ apply_bif_or_nif_epilogue: } OpCase(case_end_s): - GetArg1(0, tmp_arg1); - c_p->fvalue = tmp_arg1; - c_p->freason = EXC_CASE_CLAUSE; - goto find_func_info; + { + Eterm case_end_val; + + GetArg1(0, case_end_val); + c_p->fvalue = case_end_val; + c_p->freason = EXC_CASE_CLAUSE; + goto find_func_info; + } OpCase(if_end): c_p->freason = EXC_IF_CLAUSE; @@ -3177,10 +3204,13 @@ apply_bif_or_nif_epilogue: } OpCase(try_case_end_s): - GetArg1(0, tmp_arg1); - c_p->fvalue = tmp_arg1; - c_p->freason = EXC_TRY_CLAUSE; - goto find_func_info; + { + Eterm try_case_end_val; + GetArg1(0, try_case_end_val); + c_p->fvalue = try_case_end_val; + c_p->freason = EXC_TRY_CLAUSE; + goto find_func_info; + } /* * Construction of binaries using new instructions. @@ -3728,19 +3758,20 @@ apply_bif_or_nif_epilogue: Eterm header; BeamInstr *next; Uint slots; + Eterm context; OpCase(i_bs_start_match2_rfIId): { - tmp_arg1 = r(0); + context = r(0); do_start_match: slots = Arg(2); - if (!is_boxed(tmp_arg1)) { + if (!is_boxed(context)) { ClauseFail(); } PreFetch(4, next); - header = *boxed_val(tmp_arg1); + header = *boxed_val(context); if (header_is_bin_matchstate(header)) { - ErlBinMatchState* ms = (ErlBinMatchState *) boxed_val(tmp_arg1); + ErlBinMatchState* ms = (ErlBinMatchState *) boxed_val(context); Uint actual_slots = HEADER_NUM_SLOTS(header); ms->save_offset[0] = ms->mb.offset; if (actual_slots < slots) { @@ -3748,8 +3779,8 @@ apply_bif_or_nif_epilogue: Uint live = Arg(1); Uint wordsneeded = ERL_BIN_MATCHSTATE_SIZE(slots); - TestHeapPreserve(wordsneeded, live, tmp_arg1); - ms = (ErlBinMatchState *) boxed_val(tmp_arg1); + TestHeapPreserve(wordsneeded, live, context); + ms = (ErlBinMatchState *) boxed_val(context); dst = (ErlBinMatchState *) HTOP; *dst = *ms; *HTOP = HEADER_BIN_MATCHSTATE(slots); @@ -3761,12 +3792,12 @@ apply_bif_or_nif_epilogue: Eterm result; Uint live = Arg(1); Uint wordsneeded = ERL_BIN_MATCHSTATE_SIZE(slots); - TestHeapPreserve(wordsneeded, live, tmp_arg1); + TestHeapPreserve(wordsneeded, live, context); HEAP_TOP(c_p) = HTOP; #ifdef DEBUG c_p->stop = E; /* Needed for checking in HeapOnlyAlloc(). */ #endif - result = erts_bs_start_match_2(c_p, tmp_arg1, slots); + result = erts_bs_start_match_2(c_p, context, slots); HTOP = HEAP_TOP(c_p); HEAP_SPACE_VERIFIED(0); if (is_non_value(result)) { @@ -3780,12 +3811,12 @@ apply_bif_or_nif_epilogue: NextPF(4, next); } OpCase(i_bs_start_match2_xfIId): { - tmp_arg1 = xb(Arg(0)); + context = xb(Arg(0)); I++; goto do_start_match; } OpCase(i_bs_start_match2_yfIId): { - tmp_arg1 = yb(Arg(0)); + context = yb(Arg(0)); I++; goto do_start_match; } @@ -3878,93 +3909,105 @@ apply_bif_or_nif_epilogue: NextPF(2, next); } + { + Eterm bs_get_integer8_context; + OpCase(i_bs_get_integer_8_rfd): { - tmp_arg1 = r(0); - goto do_bs_get_integer_8; - } + bs_get_integer8_context = r(0); + goto do_bs_get_integer_8; + } OpCase(i_bs_get_integer_8_xfd): { - tmp_arg1 = xb(Arg(0)); - I++; - } + bs_get_integer8_context = xb(Arg(0)); + I++; + } do_bs_get_integer_8: { - ErlBinMatchBuffer *_mb; - Eterm _result; - _mb = ms_matchbuffer(tmp_arg1); - if (_mb->size - _mb->offset < 8) { - ClauseFail(); - } - if (BIT_OFFSET(_mb->offset) != 0) { - _result = erts_bs_get_integer_2(c_p, 8, 0, _mb); - } else { - _result = make_small(_mb->base[BYTE_OFFSET(_mb->offset)]); - _mb->offset += 8; + ErlBinMatchBuffer *_mb; + Eterm _result; + _mb = ms_matchbuffer(bs_get_integer8_context); + if (_mb->size - _mb->offset < 8) { + ClauseFail(); + } + if (BIT_OFFSET(_mb->offset) != 0) { + _result = erts_bs_get_integer_2(c_p, 8, 0, _mb); + } else { + _result = make_small(_mb->base[BYTE_OFFSET(_mb->offset)]); + _mb->offset += 8; + } + StoreBifResult(1, _result); } - StoreBifResult(1, _result); } - OpCase(i_bs_get_integer_16_rfd): { - tmp_arg1 = r(0); + { + Eterm bs_get_integer_16_context; + + OpCase(i_bs_get_integer_16_rfd): + bs_get_integer_16_context = r(0); goto do_bs_get_integer_16; - } - OpCase(i_bs_get_integer_16_xfd): { - tmp_arg1 = xb(Arg(0)); + OpCase(i_bs_get_integer_16_xfd): + bs_get_integer_16_context = xb(Arg(0)); I++; - } - do_bs_get_integer_16: { - ErlBinMatchBuffer *_mb; - Eterm _result; - _mb = ms_matchbuffer(tmp_arg1); - if (_mb->size - _mb->offset < 16) { - ClauseFail(); - } - if (BIT_OFFSET(_mb->offset) != 0) { - _result = erts_bs_get_integer_2(c_p, 16, 0, _mb); - } else { - _result = make_small(get_int16(_mb->base+BYTE_OFFSET(_mb->offset))); - _mb->offset += 16; + do_bs_get_integer_16: + { + ErlBinMatchBuffer *_mb; + Eterm _result; + _mb = ms_matchbuffer(bs_get_integer_16_context); + if (_mb->size - _mb->offset < 16) { + ClauseFail(); + } + if (BIT_OFFSET(_mb->offset) != 0) { + _result = erts_bs_get_integer_2(c_p, 16, 0, _mb); + } else { + _result = make_small(get_int16(_mb->base+BYTE_OFFSET(_mb->offset))); + _mb->offset += 16; + } + StoreBifResult(1, _result); } - StoreBifResult(1, _result); } - OpCase(i_bs_get_integer_32_rfId): { - tmp_arg1 = r(0); + { + Eterm bs_get_integer_32_context; + + OpCase(i_bs_get_integer_32_rfId): + bs_get_integer_32_context = r(0); goto do_bs_get_integer_32; - } + - OpCase(i_bs_get_integer_32_xfId): { - tmp_arg1 = xb(Arg(0)); + OpCase(i_bs_get_integer_32_xfId): + bs_get_integer_32_context = xb(Arg(0)); I++; - } - do_bs_get_integer_32: { - ErlBinMatchBuffer *_mb; - Uint32 _integer; - Eterm _result; - _mb = ms_matchbuffer(tmp_arg1); - if (_mb->size - _mb->offset < 32) { ClauseFail(); } - if (BIT_OFFSET(_mb->offset) != 0) { - _integer = erts_bs_get_unaligned_uint32(_mb); - } else { - _integer = get_int32(_mb->base + _mb->offset/8); - } - _mb->offset += 32; + + do_bs_get_integer_32: + { + ErlBinMatchBuffer *_mb; + Uint32 _integer; + Eterm _result; + _mb = ms_matchbuffer(bs_get_integer_32_context); + if (_mb->size - _mb->offset < 32) { ClauseFail(); } + if (BIT_OFFSET(_mb->offset) != 0) { + _integer = erts_bs_get_unaligned_uint32(_mb); + } else { + _integer = get_int32(_mb->base + _mb->offset/8); + } + _mb->offset += 32; #if !defined(ARCH_64) || HALFWORD_HEAP - if (IS_USMALL(0, _integer)) { + if (IS_USMALL(0, _integer)) { #endif - _result = make_small(_integer); + _result = make_small(_integer); #if !defined(ARCH_64) || HALFWORD_HEAP - } else { - TestHeap(BIG_UINT_HEAP_SIZE, Arg(1)); - _result = uint_to_big((Uint) _integer, HTOP); - HTOP += BIG_UINT_HEAP_SIZE; - HEAP_SPACE_VERIFIED(0); - } + } else { + TestHeap(BIG_UINT_HEAP_SIZE, Arg(1)); + _result = uint_to_big((Uint) _integer, HTOP); + HTOP += BIG_UINT_HEAP_SIZE; + HEAP_SPACE_VERIFIED(0); + } #endif - StoreBifResult(2, _result); + StoreBifResult(2, _result); + } } /* Operands: Size Live Fail Flags Dst */ @@ -4062,54 +4105,64 @@ apply_bif_or_nif_epilogue: StoreBifResult(3, result); } - /* Operands: MatchContext Fail Dst */ + { + Eterm get_utf8_context; + + /* Operands: MatchContext Fail Dst */ OpCase(i_bs_get_utf8_rfd): { - tmp_arg1 = r(0); - goto do_bs_get_utf8; - } + get_utf8_context = r(0); + goto do_bs_get_utf8; + } OpCase(i_bs_get_utf8_xfd): { - tmp_arg1 = xb(Arg(0)); - I++; - } + get_utf8_context = xb(Arg(0)); + I++; + } - /* - * tmp_arg1 = match_context - * Operands: Fail Dst - */ + /* + * get_utf8_context = match_context + * Operands: Fail Dst + */ - do_bs_get_utf8: { - Eterm result = erts_bs_get_utf8(ms_matchbuffer(tmp_arg1)); - if (is_non_value(result)) { - ClauseFail(); + do_bs_get_utf8: { + Eterm result = erts_bs_get_utf8(ms_matchbuffer(get_utf8_context)); + if (is_non_value(result)) { + ClauseFail(); + } + StoreBifResult(1, result); } - StoreBifResult(1, result); } - /* Operands: MatchContext Fail Flags Dst */ + { + Eterm get_utf16_context; + + /* Operands: MatchContext Fail Flags Dst */ OpCase(i_bs_get_utf16_rfId): { - tmp_arg1 = r(0); - goto do_bs_get_utf16; - } + get_utf16_context = r(0); + goto do_bs_get_utf16; + } OpCase(i_bs_get_utf16_xfId): { - tmp_arg1 = xb(Arg(0)); - I++; - } + get_utf16_context = xb(Arg(0)); + I++; + } - /* - * tmp_arg1 = match_context - * Operands: Fail Flags Dst - */ - do_bs_get_utf16: { - Eterm result = erts_bs_get_utf16(ms_matchbuffer(tmp_arg1), Arg(1)); - if (is_non_value(result)) { - ClauseFail(); + /* + * get_utf16_context = match_context + * Operands: Fail Flags Dst + */ + do_bs_get_utf16: { + Eterm result = erts_bs_get_utf16(ms_matchbuffer(get_utf16_context), + Arg(1)); + if (is_non_value(result)) { + ClauseFail(); + } + StoreBifResult(2, result); } - StoreBifResult(2, result); } { + Eterm context_to_binary_context; ErlBinMatchBuffer* mb; ErlSubBin* sb; Uint size; @@ -4118,27 +4171,29 @@ apply_bif_or_nif_epilogue: Uint hole_size; OpCase(bs_context_to_binary_r): { - tmp_arg1 = x0; + context_to_binary_context = x0; I -= 2; goto do_context_to_binary; } /* Unfortunately, inlining can generate this instruction. */ OpCase(bs_context_to_binary_y): { - tmp_arg1 = yb(Arg(0)); + context_to_binary_context = yb(Arg(0)); goto do_context_to_binary0; } OpCase(bs_context_to_binary_x): { - tmp_arg1 = xb(Arg(0)); + context_to_binary_context = xb(Arg(0)); do_context_to_binary0: I--; } do_context_to_binary: - if (is_boxed(tmp_arg1) && header_is_bin_matchstate(*boxed_val(tmp_arg1))) { - ErlBinMatchState* ms = (ErlBinMatchState *) boxed_val(tmp_arg1); + if (is_boxed(context_to_binary_context) && + header_is_bin_matchstate(*boxed_val(context_to_binary_context))) { + ErlBinMatchState* ms; + ms = (ErlBinMatchState *) boxed_val(context_to_binary_context); mb = &ms->mb; offs = ms->save_offset[0]; size = mb->size - offs; @@ -4147,17 +4202,17 @@ apply_bif_or_nif_epilogue: Next(2); OpCase(i_bs_get_binary_all_reuse_rfI): { - tmp_arg1 = x0; + context_to_binary_context = x0; goto do_bs_get_binary_all_reuse; } OpCase(i_bs_get_binary_all_reuse_xfI): { - tmp_arg1 = xb(Arg(0)); + context_to_binary_context = xb(Arg(0)); I++; } do_bs_get_binary_all_reuse: - mb = ms_matchbuffer(tmp_arg1); + mb = ms_matchbuffer(context_to_binary_context); size = mb->size - mb->offset; if (size % Arg(1) != 0) { ClauseFail(); @@ -4166,7 +4221,7 @@ apply_bif_or_nif_epilogue: do_bs_get_binary_all_reuse_common: orig = mb->orig; - sb = (ErlSubBin *) boxed_val(tmp_arg1); + sb = (ErlSubBin *) boxed_val(context_to_binary_context); hole_size = 1 + header_arity(sb->thing_word) - ERL_SUB_BIN_SIZE; sb->thing_word = HEADER_SUB_BIN; sb->size = BYTE_OFFSET(size); @@ -4182,12 +4237,14 @@ apply_bif_or_nif_epilogue: } { + Eterm match_string_context; + OpCase(i_bs_match_string_rfII): { - tmp_arg1 = r(0); + match_string_context = r(0); goto do_bs_match_string; } OpCase(i_bs_match_string_xfII): { - tmp_arg1 = xb(Arg(0)); + match_string_context = xb(Arg(0)); I++; } @@ -4202,7 +4259,7 @@ apply_bif_or_nif_epilogue: PreFetch(3, next); bits = Arg(1); bytes = (byte *) Arg(2); - mb = ms_matchbuffer(tmp_arg1); + mb = ms_matchbuffer(match_string_context); if (mb->size - mb->offset < bits) { ClauseFail(); } -- cgit v1.2.3 From 55eef00e9e0331bddd277194509c6094e8306211 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Fri, 19 Nov 2010 16:46:58 +0100 Subject: Eliminate use of GetArg1() in the select_val instruction Instead of having one i_select_val_sfI instruction that uses the GetArg1() macro to fetch the controlling expression, use three separate instructions for each of the register types. That will save one word when selecting on the {x,0} register. It should also be slightly faster since a conditional branch is eliminated. Although it seems that the BEAM compiler will never generate a constant controlling expression (even with optimizations turned off), we still make sure that they will work by evaluating the select_val instruction at load time. Handle the select_tuple_arity instruction in the same way. --- erts/emulator/beam/beam_debug.c | 8 ++++++-- erts/emulator/beam/beam_emu.c | 26 ++++++++++++++++++++++---- erts/emulator/beam/beam_load.c | 33 +++++++++++++++++++++++---------- erts/emulator/beam/ops.tab | 20 ++++++++++++-------- 4 files changed, 63 insertions(+), 24 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c index 6c16c24d0d..46b831fa88 100644 --- a/erts/emulator/beam/beam_debug.c +++ b/erts/emulator/beam/beam_debug.c @@ -555,7 +555,9 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr) unpacked = ap; ap = addr + size; switch (op) { - case op_i_select_val_sfI: + case op_i_select_val_rfI: + case op_i_select_val_xfI: + case op_i_select_val_yfI: { int n = ap[-1]; @@ -567,7 +569,9 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr) } } break; - case op_i_select_tuple_arity_sfI: + case op_i_select_tuple_arity_rfI: + case op_i_select_tuple_arity_xfI: + case op_i_select_tuple_arity_yfI: { int n = ap[-1]; diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 09f6fc8cef..6cd7fa78e0 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -1865,9 +1865,19 @@ void process_main(void) { Eterm select_val; - OpCase(i_select_tuple_arity_sfI): - GetArg1(0, select_val); + OpCase(i_select_tuple_arity_xfI): + select_val = xb(Arg(0)); + goto do_select_tuple_arity; + OpCase(i_select_tuple_arity_yfI): + select_val = yb(Arg(0)); + goto do_select_tuple_arity; + + OpCase(i_select_tuple_arity_rfI): + select_val = r(0); + I--; + + do_select_tuple_arity: if (is_tuple(select_val)) { select_val = *tuple_val(select_val); goto do_binary_search; @@ -1875,9 +1885,17 @@ void process_main(void) SET_I((BeamInstr *) Arg(1)); Goto(*I); + OpCase(i_select_val_xfI): + select_val = xb(Arg(0)); + goto do_binary_search; - OpCase(i_select_val_sfI): - GetArg1(0, select_val); + OpCase(i_select_val_yfI): + select_val = yb(Arg(0)); + goto do_binary_search; + + OpCase(i_select_val_rfI): + select_val = r(0); + I--; do_binary_search: { diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 73f057929e..54c8ad0eb1 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -3282,7 +3282,6 @@ const_select_val(LoaderState* stp, GenOpArg S, GenOpArg Fail, int i; ASSERT(Size.type == TAG_u); - ASSERT(S.type == TAG_q); NEW_GENOP(stp, op); op->next = NULL; @@ -3293,18 +3292,32 @@ const_select_val(LoaderState* stp, GenOpArg S, GenOpArg Fail, * Search for a literal matching the controlling expression. */ - if (S.type == TAG_q) { - Eterm expr = stp->literals[S.val].term; - for (i = 0; i < Size.val; i += 2) { - if (Rest[i].type == TAG_q) { - Eterm term = stp->literals[Rest[i].val].term; - if (eq(term, expr)) { - ASSERT(Rest[i+1].type == TAG_f); - op->a[0] = Rest[i+1]; - return op; + switch (S.type) { + case TAG_q: + { + Eterm expr = stp->literals[S.val].term; + for (i = 0; i < Size.val; i += 2) { + if (Rest[i].type == TAG_q) { + Eterm term = stp->literals[Rest[i].val].term; + if (eq(term, expr)) { + ASSERT(Rest[i+1].type == TAG_f); + op->a[0] = Rest[i+1]; + return op; + } } } } + break; + case TAG_i: + case TAG_a: + for (i = 0; i < Size.val; i += 2) { + if (Rest[i].val == S.val && Rest[i].type == S.type) { + ASSERT(Rest[i+1].type == TAG_f); + op->a[0] = Rest[i+1]; + return op; + } + } + break; } /* diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index f629cc80b6..45b2e197b4 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -124,7 +124,7 @@ init Y1 | init Y2 => init2 Y1 Y2 # Selecting values -select_val S=q Fail=f Size=u Rest=* => const_select_val(S, Fail, Size, Rest) +select_val S=aiq Fail=f Size=u Rest=* => const_select_val(S, Fail, Size, Rest) select_val S=s Fail=f Size=u Rest=* | use_jump_tab(Size, Rest) => \ gen_jump_tab(S, Fail, Size, Rest) @@ -139,26 +139,30 @@ is_integer TypeFail=f S | select_val S=s Fail=f Size=u Rest=* | \ select_val S=s Fail=f Size=u Rest=* | mixed_types(Size, Rest) => \ gen_split_values(S, Fail, Fail, Size, Rest) -is_integer Fail=f S | select_val S=s Fail=f Size=u Rest=* | \ +is_integer Fail=f S | select_val S=d Fail=f Size=u Rest=* | \ fixed_size_values(Size, Rest) => gen_select_val(S, Fail, Size, Rest) -is_atom Fail=f S | select_val S=s Fail=f Size=u Rest=* | \ +is_atom Fail=f S | select_val S=d Fail=f Size=u Rest=* | \ fixed_size_values(Size, Rest) => gen_select_val(S, Fail, Size, Rest) select_val S=s Fail=f Size=u Rest=* | floats_or_bignums(Size, Rest) => \ gen_select_literals(S, Fail, Size, Rest) -select_val S=s Fail=f Size=u Rest=* | fixed_size_values(Size, Rest) => \ +select_val S=d Fail=f Size=u Rest=* | fixed_size_values(Size, Rest) => \ gen_select_val(S, Fail, Size, Rest) -is_tuple Fail=f S | select_tuple_arity S=s Fail=f Size=u Rest=* => \ +is_tuple Fail=f S | select_tuple_arity S=d Fail=f Size=u Rest=* => \ gen_select_tuple_arity(S, Fail, Size, Rest) -select_tuple_arity S=s Fail=f Size=u Rest=* => \ +select_tuple_arity S=d Fail=f Size=u Rest=* => \ gen_select_tuple_arity(S, Fail, Size, Rest) -i_select_val s f I -i_select_tuple_arity s f I +i_select_val r f I +i_select_val x f I +i_select_val y f I +i_select_tuple_arity r f I +i_select_tuple_arity x f I +i_select_tuple_arity y f I i_jump_on_val_zero s f I i_jump_on_val s f I I -- cgit v1.2.3 From 2233398336a35c7160e82efa8ee218a4277739cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 7 Dec 2010 14:34:28 +0100 Subject: Eliminate use of GetArg1() in the jump_on_val* instructions --- erts/emulator/beam/beam_debug.c | 8 +++++-- erts/emulator/beam/beam_emu.c | 51 ++++++++++++++++++++++++++++++----------- erts/emulator/beam/ops.tab | 9 ++++++-- 3 files changed, 50 insertions(+), 18 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c index 46b831fa88..2855241b91 100644 --- a/erts/emulator/beam/beam_debug.c +++ b/erts/emulator/beam/beam_debug.c @@ -584,7 +584,9 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr) } } break; - case op_i_jump_on_val_sfII: + case op_i_jump_on_val_rfII: + case op_i_jump_on_val_xfII: + case op_i_jump_on_val_yfII: { int n; for (n = ap[-2]; n > 0; n--) { @@ -594,7 +596,9 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr) } } break; - case op_i_jump_on_val_zero_sfI: + case op_i_jump_on_val_zero_rfI: + case op_i_jump_on_val_zero_xfI: + case op_i_jump_on_val_zero_yfI: { int n; for (n = ap[-1]; n > 0; n--) { diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 6cd7fa78e0..1b9fc86871 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -1946,15 +1946,26 @@ void process_main(void) } } - OpCase(i_jump_on_val_zero_sfI): { - Eterm index; + Eterm jump_on_val_zero_index; + + OpCase(i_jump_on_val_zero_yfI): + jump_on_val_zero_index = yb(Arg(0)); + goto do_jump_on_val_zero_index; + + OpCase(i_jump_on_val_zero_xfI): + jump_on_val_zero_index = xb(Arg(0)); + goto do_jump_on_val_zero_index; - GetArg1(0, index); - if (is_small(index)) { - index = signed_val(index); - if (index < Arg(2)) { - SET_I((BeamInstr *) (&Arg(3))[index]); + OpCase(i_jump_on_val_zero_rfI): + jump_on_val_zero_index = r(0); + I--; + + do_jump_on_val_zero_index: + if (is_small(jump_on_val_zero_index)) { + jump_on_val_zero_index = signed_val(jump_on_val_zero_index); + if (jump_on_val_zero_index < Arg(2)) { + SET_I((BeamInstr *) (&Arg(3))[jump_on_val_zero_index]); Goto(*I); } } @@ -1962,15 +1973,27 @@ void process_main(void) Goto(*I); } - OpCase(i_jump_on_val_sfII): { - Eterm index; + Eterm jump_on_val_index; + + + OpCase(i_jump_on_val_yfII): + jump_on_val_index = yb(Arg(0)); + goto do_jump_on_val_index; + + OpCase(i_jump_on_val_xfII): + jump_on_val_index = xb(Arg(0)); + goto do_jump_on_val_index; + + OpCase(i_jump_on_val_rfII): + jump_on_val_index = r(0); + I--; - GetArg1(0, index); - if (is_small(index)) { - index = (Uint) (signed_val(index) - Arg(3)); - if (index < Arg(2)) { - SET_I((BeamInstr *) (&Arg(4))[index]); + do_jump_on_val_index: + if (is_small(jump_on_val_index)) { + jump_on_val_index = (Uint) (signed_val(jump_on_val_index) - Arg(3)); + if (jump_on_val_index < Arg(2)) { + SET_I((BeamInstr *) (&Arg(4))[jump_on_val_index]); Goto(*I); } } diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 45b2e197b4..f158fa9543 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -164,8 +164,13 @@ i_select_tuple_arity r f I i_select_tuple_arity x f I i_select_tuple_arity y f I -i_jump_on_val_zero s f I -i_jump_on_val s f I I +i_jump_on_val_zero r f I +i_jump_on_val_zero x f I +i_jump_on_val_zero y f I + +i_jump_on_val r f I I +i_jump_on_val x f I I +i_jump_on_val y f I I jump Target | label Lbl | same_label(Target, Lbl) => label Lbl -- cgit v1.2.3 From ec2fcc7aefec2f4ede4f789098f5093cd2fe00b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 7 Dec 2010 11:09:51 +0100 Subject: Eliminate use of GetArg1() in the fast_element instruction Use separate instructions for each register type. --- erts/emulator/beam/beam_emu.c | 42 +++++++++++++++++++++++++----------------- erts/emulator/beam/beam_load.c | 22 +++++++++++----------- erts/emulator/beam/ops.tab | 5 ++++- 3 files changed, 40 insertions(+), 29 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 1b9fc86871..bdc730e6c1 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -1492,24 +1492,32 @@ void process_main(void) c_p->freason = BADARG; goto lb_Cl_error; - OpCase(i_fast_element_jIsd): { - Eterm tuple; - - /* - * Inlined version of element/2 for even more speed. - * The first argument is an untagged integer >= 1. - * The second argument is guaranteed to be a register operand. - */ - GetArg1(2, tuple); - if (is_tuple(tuple)) { - Eterm* tp = tuple_val(tuple); - Eterm pos = Arg(1); - if (pos <= arityval(*tp)) { - Eterm result = tp[pos]; - StoreBifResult(3, result); - } - } + { + Eterm fast_element_tuple; + + OpCase(i_fast_element_rjId): + fast_element_tuple = r(0); + + do_fast_element: + if (is_tuple(fast_element_tuple)) { + Eterm* tp = tuple_val(fast_element_tuple); + Eterm pos = Arg(1); /* Untagged integer >= 1 */ + if (pos <= arityval(*tp)) { + Eterm result = tp[pos]; + StoreBifResult(2, result); + } + } goto badarg; + + OpCase(i_fast_element_xjId): + fast_element_tuple = xb(Arg(0)); + I++; + goto do_fast_element; + + OpCase(i_fast_element_yjId): + fast_element_tuple = yb(Arg(0)); + I++; + goto do_fast_element; } OpCase(catch_yf): diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 54c8ad0eb1..4233e26f54 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -2308,23 +2308,23 @@ gen_element(LoaderState* stp, GenOpArg Fail, GenOpArg Index, GenOp* op; NEW_GENOP(stp, op); - op->op = genop_i_element_4; op->arity = 4; - op->a[0] = Fail; - op->a[1] = Index; - op->a[2] = Tuple; - op->a[3] = Dst; op->next = NULL; - /* - * If safe, generate a faster instruction. - */ - if (Index.type == TAG_i && Index.val > 0 && (Tuple.type == TAG_r || Tuple.type == TAG_x || Tuple.type == TAG_y)) { op->op = genop_i_fast_element_4; - op->a[1].type = TAG_u; - op->a[1].val = Index.val; + op->a[0] = Tuple; + op->a[1] = Fail; + op->a[2].type = TAG_u; + op->a[2].val = Index.val; + op->a[3] = Dst; + } else { + op->op = genop_i_element_4; + op->a[0] = Fail; + op->a[1] = Index; + op->a[2] = Tuple; + op->a[3] = Dst; } return op; diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index f158fa9543..491f8f1d90 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -920,7 +920,10 @@ node x node y %hot -i_fast_element j I s d +i_fast_element r j I d +i_fast_element x j I d +i_fast_element y j I d + i_element j s s d bif1 f b s d -- cgit v1.2.3 From 3694076f5a68cc78e9ccd6c67651bc0f761fc94f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 7 Dec 2010 13:57:24 +0100 Subject: Eliminate use of GetArg2() in the i_element instruction Use separate instructions for each register type. --- erts/emulator/beam/beam_emu.c | 48 ++++++++++++++++++++++++++---------------- erts/emulator/beam/beam_load.c | 6 +++--- erts/emulator/beam/ops.tab | 4 +++- 3 files changed, 36 insertions(+), 22 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index bdc730e6c1..fdb7d86c78 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -1467,24 +1467,36 @@ void process_main(void) goto find_func_info; } - OpCase(i_element_jssd): { - Eterm index; - Eterm tuple; - - /* - * Inlined version of element/2 for speed. - */ - GetArg2(1, index, tuple); - if (is_small(index) && is_tuple(tuple)) { - Eterm* tp = tuple_val(tuple); - - if ((signed_val(index) >= 1) && - (signed_val(index) <= arityval(*tp))) { - Eterm result = tp[signed_val(index)]; - StoreBifResult(3, result); - } - } - } + { + Eterm element_index; + Eterm element_tuple; + + OpCase(i_element_xjsd): + element_tuple = xb(Arg(0)); + I++; + goto do_element; + + OpCase(i_element_yjsd): + element_tuple = yb(Arg(0)); + I++; + goto do_element; + + OpCase(i_element_rjsd): + element_tuple = r(0); + /* Fall through */ + + do_element: + GetArg1(1, element_index); + if (is_small(element_index) && is_tuple(element_tuple)) { + Eterm* tp = tuple_val(element_tuple); + + if ((signed_val(element_index) >= 1) && + (signed_val(element_index) <= arityval(*tp))) { + Eterm result = tp[signed_val(element_index)]; + StoreBifResult(2, result); + } + } + } /* Fall through */ OpCase(badarg_j): diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 4233e26f54..02aef95a52 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -2321,9 +2321,9 @@ gen_element(LoaderState* stp, GenOpArg Fail, GenOpArg Index, op->a[3] = Dst; } else { op->op = genop_i_element_4; - op->a[0] = Fail; - op->a[1] = Index; - op->a[2] = Tuple; + op->a[0] = Tuple; + op->a[1] = Fail; + op->a[2] = Index; op->a[3] = Dst; } diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 491f8f1d90..edc44f966b 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -924,7 +924,9 @@ i_fast_element r j I d i_fast_element x j I d i_fast_element y j I d -i_element j s s d +i_element r j s d +i_element x j s d +i_element y j s d bif1 f b s d bif1_body b s d -- cgit v1.2.3 From 5cf1e739a5599943c1ac1d40d0577f83f5e0e62b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 9 Dec 2010 20:57:27 +0100 Subject: Eliminate use of GetArg1() in the badmatch and case_end instructions Create separate instructions for each register type. The "badmatch x(0)" and "case_end x(0)" (which are very common) will only require a single word each, compared to two words when GetArg1() is used. --- erts/emulator/beam/beam_emu.c | 30 ++++++++++++++++++++++++++---- erts/emulator/beam/ops.tab | 14 ++++++++++---- 2 files changed, 36 insertions(+), 8 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index fdb7d86c78..ea813ef09f 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -3040,10 +3040,21 @@ void process_main(void) goto find_func_info; } - OpCase(badmatch_s): { + { Eterm badmatch_val; - GetArg1(0, badmatch_val); + OpCase(badmatch_y): + badmatch_val = yb(Arg(0)); + goto do_badmatch; + + OpCase(badmatch_x): + badmatch_val = xb(Arg(0)); + goto do_badmatch; + + OpCase(badmatch_r): + badmatch_val = r(0); + + do_badmatch: c_p->fvalue = badmatch_val; c_p->freason = BADMATCH; } @@ -3244,11 +3255,22 @@ void process_main(void) StoreBifResult(1, result); } - OpCase(case_end_s): { Eterm case_end_val; - GetArg1(0, case_end_val); + OpCase(case_end_x): + case_end_val = xb(Arg(0)); + goto do_case_end; + + OpCase(case_end_y): + case_end_val = yb(Arg(0)); + goto do_case_end; + + OpCase(case_end_r): + case_end_val = r(0); + I--; + + do_case_end: c_p->fvalue = case_end_val; c_p->freason = EXC_CASE_CLAUSE; goto find_func_info; diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index edc44f966b..c10b3e8d52 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -250,11 +250,17 @@ is_number Fail Literal=q => move Literal x | is_number Fail x jump f -case_end Literal=q => move Literal x | case_end x -badmatch Literal=q => move Literal x | badmatch x +case_end Literal=cq => move Literal x | case_end x +badmatch Literal=cq => move Literal x | badmatch x + +case_end r +case_end x +case_end y + +badmatch r +badmatch x +badmatch y -case_end s -badmatch s if_end raise s s -- cgit v1.2.3 From d5ab5485430c00734a79068fe0eee7bd4f87cf00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 9 Dec 2010 21:31:08 +0100 Subject: Eliminate the specific move_sd instruction The move_sd specific instruction is no longer used since there are specific move instructions covering all possible permutations of operands. Also eliminate the move_cy instruction because it is almost never generated by the compiler. --- erts/emulator/beam/ops.tab | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index c10b3e8d52..0b2e2dca83 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -279,6 +279,11 @@ move2 x y x y move2 y x y x move2 x x x x +# The compiler almost never generates a "move Literal y(Y)" instruction, +# so let's cheat if we encounter one. +move S=n D=y => init D +move S=c D=y => move S x | move x D + %macro:move Move -pack -gen_dest move x x move x y @@ -289,15 +294,10 @@ move r x move r y move c r move c x -move c y move n x move n r move y y -%cold -move s d -%hot - # Receive operations. loop_rec Fail Src | smp_mark_target_label(Fail) => i_loop_rec Fail Src -- cgit v1.2.3 From 92f1409c0e9b986780be483976de3cbbff3bab34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 16 Dec 2010 11:15:11 +0100 Subject: Eliminate the "put_list c n Dst" instructions Since the literal (constant) pool was introduced in R12, the BEAM compiler will never generate a "put_list Const [] Dst" instruction (it will instead generate a "move [Const] Dst" instruction). --- erts/emulator/beam/ops.tab | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 0b2e2dca83..426cdef24d 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -376,16 +376,21 @@ i_put_tuple r I i_put_tuple x I i_put_tuple y I +# +# The instruction "put_list Const [] Dst" will not be generated by +# the current BEAM compiler. But until R15A, play it safe by handling +# that instruction with the following transformation. +# +put_list Const=c n Dst => move Const x | put_list x n Dst + %macro:put_list PutList -pack -gen_dest put_list x n x put_list y n x put_list x x x put_list y x x -put_list c n x put_list x x r put_list y r r -put_list c n r put_list y y x put_list x y x -- cgit v1.2.3 From bc0a998391ce2720f2c6099c809a45094d099fbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 9 Dec 2010 20:39:36 +0100 Subject: Introduce a few more specialized put_list instructions --- erts/emulator/beam/ops.tab | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 426cdef24d..04e2e72b0d 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -401,6 +401,13 @@ put_list y y r put_list y r x put_list r n x +put_list x r x +put_list x y r +put_list y x r +put_list y x x + +put_list x r r + # put_list SrcReg Constant Dst put_list r c r put_list r c x @@ -428,7 +435,6 @@ put_list c y x put_list c y y %cold -put_list x r r put_list s s d %hot -- cgit v1.2.3 From b166f8975387d7ef07409e841d45c2cd74c2282e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 7 Dec 2010 16:04:05 +0100 Subject: Introduce a special instruction for select_val with two values The new instruction will save one word (because no size operand is needed), and is slightly faster. Handle select_tuple_arity in the same way. --- erts/emulator/beam/beam_emu.c | 47 ++++++++++++++++++++++++++++++++++++++++++ erts/emulator/beam/beam_load.c | 27 ++++++++++++++++++++++++ erts/emulator/beam/ops.tab | 9 ++++++++ 3 files changed, 83 insertions(+) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index ea813ef09f..afba17f7bd 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -1882,6 +1882,53 @@ void process_main(void) NextPF(0, next); } + + { + Eterm select_val2; + + OpCase(i_select_tuple_arity2_yfAfAf): + select_val2 = yb(Arg(0)); + goto do_select_tuple_arity2; + + OpCase(i_select_tuple_arity2_xfAfAf): + select_val2 = xb(Arg(0)); + goto do_select_tuple_arity2; + + OpCase(i_select_tuple_arity2_rfAfAf): + select_val2 = r(0); + I--; + + do_select_tuple_arity2: + if (is_not_tuple(select_val2)) { + goto select_val2_fail; + } + select_val2 = *tuple_val(select_val2); + goto do_select_val2; + + OpCase(i_select_val2_yfcfcf): + select_val2 = yb(Arg(0)); + goto do_select_val2; + + OpCase(i_select_val2_xfcfcf): + select_val2 = xb(Arg(0)); + goto do_select_val2; + + OpCase(i_select_val2_rfcfcf): + select_val2 = r(0); + I--; + + do_select_val2: + if (select_val2 == Arg(2)) { + I += 2; + } else if (select_val2 == Arg(4)) { + I += 4; + } + + select_val2_fail: + SET_I((BeamInstr *) Arg(1)); + Goto(*I); + } + { Eterm select_val; diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 02aef95a52..2d57c8a218 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -2978,6 +2978,21 @@ gen_select_tuple_arity(LoaderState* stp, GenOpArg S, GenOpArg Fail, ASSERT(op->a[i].val < op->a[i+2].val); } #endif + + /* + * Use a special-cased instruction if there are only two values. + */ + if (size == 2) { + op->op = genop_i_select_tuple_arity2_6; + op->arity--; + op->a[2].type = TAG_u; + op->a[2].val = arityval(op->a[3].val); + op->a[3] = op->a[4]; + op->a[4].type = TAG_u; + op->a[4].val = arityval(op->a[5].val); + op->a[5] = op->a[6]; + } + return op; } @@ -3228,6 +3243,18 @@ gen_select_val(LoaderState* stp, GenOpArg S, GenOpArg Fail, } #endif + /* + * Use a special-cased instruction if there are only two values. + */ + if (size == 2) { + op->op = genop_i_select_val2_6; + op->arity--; + op->a[2] = op->a[3]; + op->a[3] = op->a[4]; + op->a[4] = op->a[5]; + op->a[5] = op->a[6]; + } + return op; } diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 04e2e72b0d..6db55e1402 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -160,6 +160,15 @@ select_tuple_arity S=d Fail=f Size=u Rest=* => \ i_select_val r f I i_select_val x f I i_select_val y f I + +i_select_val2 r f c f c f +i_select_val2 x f c f c f +i_select_val2 y f c f c f + +i_select_tuple_arity2 r f A f A f +i_select_tuple_arity2 x f A f A f +i_select_tuple_arity2 y f A f A f + i_select_tuple_arity r f I i_select_tuple_arity x f I i_select_tuple_arity y f I -- cgit v1.2.3 From 74d7bd100b742a803c8de82c9c997308cf871038 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 8 Dec 2010 15:27:37 +0100 Subject: Optimize addition of a small integer to a variable Introduce a new i_increment/4 to optimize the addition of a register and a small integer. This instruction saves two instruction words compared to the standard instructions (an i_fetch/2 instruction followed by a i_plus/3 instruction) and will also be slightly faster. --- erts/emulator/beam/beam_emu.c | 46 ++++++++++++++++++++++++++++++++++++++++++ erts/emulator/beam/beam_load.c | 46 ++++++++++++++++++++++++++++++++++++++++++ erts/emulator/beam/ops.tab | 20 +++++++++++++++++- 3 files changed, 111 insertions(+), 1 deletion(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index afba17f7bd..741ba8fb93 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -1276,6 +1276,52 @@ void process_main(void) #define STORE_ARITH_RESULT(res) StoreBifResult(2, (res)); #define ARITH_FUNC(name) erts_gc_##name + { + Eterm increment_reg_val; + Eterm increment_val; + Uint live; + Eterm result; + + OpCase(i_increment_yIId): + increment_reg_val = yb(Arg(0)); + goto do_increment; + + OpCase(i_increment_xIId): + increment_reg_val = xb(Arg(0)); + goto do_increment; + + OpCase(i_increment_rIId): + increment_reg_val = r(0); + I--; + + do_increment: + increment_val = Arg(1); + if (is_small(increment_reg_val)) { + Sint i = signed_val(increment_reg_val) + increment_val; + ASSERT(MY_IS_SSMALL(i) == IS_SSMALL(i)); + if (MY_IS_SSMALL(i)) { + result = make_small(i); + store_result: + StoreBifResult(3, result); + } + } + + live = Arg(2); + SWAPOUT; + reg[0] = r(0); + reg[live] = increment_reg_val; + reg[live+1] = make_small(increment_val); + result = erts_gc_mixed_plus(c_p, reg, live); + r(0) = reg[0]; + SWAPIN; + ERTS_HOLE_CHECK(c_p); + if (is_value(result)) { + goto store_result; + } + ASSERT(c_p->freason != BADMATCH || is_value(c_p->fvalue)); + goto find_func_info; + } + OpCase(i_plus_jId): { Eterm result; diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 2d57c8a218..e6448931eb 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -2787,6 +2787,52 @@ gen_skip_bits2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, return op; } +static GenOp* +gen_increment(LoaderState* stp, GenOpArg Reg, GenOpArg Integer, + GenOpArg Live, GenOpArg Dst) +{ + GenOp* op; + + NEW_GENOP(stp, op); + op->op = genop_i_increment_4; + op->arity = 4; + op->next = NULL; + op->a[0] = Reg; + op->a[1].type = TAG_u; + op->a[1].val = Integer.val; + op->a[2] = Live; + op->a[3] = Dst; + return op; +} + +static GenOp* +gen_increment_from_minus(LoaderState* stp, GenOpArg Reg, GenOpArg Integer, + GenOpArg Live, GenOpArg Dst) +{ + GenOp* op; + + NEW_GENOP(stp, op); + op->op = genop_i_increment_4; + op->arity = 4; + op->next = NULL; + op->a[0] = Reg; + op->a[1].type = TAG_u; + op->a[1].val = -Integer.val; + op->a[2] = Live; + op->a[3] = Dst; + return op; +} + +/* + * Test whether the negation of the given number is small. + */ +static int +negation_is_small(LoaderState* stp, GenOpArg Int) +{ + return Int.type == TAG_i && IS_SSMALL(-Int.val); +} + + static int smp(LoaderState* stp) { diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 6db55e1402..029d7b512c 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -1371,7 +1371,21 @@ apply I apply_last I P # -# New GCing arithmetic instructions. +# Optimize addition and subtraction of small literals using +# the i_increment/4 instruction (in bodies, not in guards). +# + +gc_bif2 p Live u$bif:erlang:splus/2 Int=i Reg=d Dst => \ + gen_increment(Reg, Int, Live, Dst) +gc_bif2 p Live u$bif:erlang:splus/2 Reg=d Int=i Dst => \ + gen_increment(Reg, Int, Live, Dst) + +gc_bif2 p Live u$bif:erlang:sminus/2 Reg=d Int=i Dst | \ + negation_is_small(Int) => \ + gen_increment_from_minus(Reg, Int, Live, Dst) + +# +# GCing arithmetic instructions. # gc_bif2 Fail I u$bif:erlang:splus/2 S1 S2 Dst=d => i_fetch S1 S2 | i_plus Fail I Dst @@ -1394,6 +1408,10 @@ gc_bif1 Fail I u$bif:erlang:bnot/1 Src Dst=d => i_int_bnot Fail Src I Dst gc_bif1 Fail I u$bif:erlang:sminus/1 Src Dst=d => i_fetch i Src | i_minus Fail I Dst gc_bif1 Fail I u$bif:erlang:splus/1 Src Dst=d => i_fetch i Src | i_plus Fail I Dst +i_increment r I I d +i_increment x I I d +i_increment y I I d + i_plus j I d i_minus j I d i_times j I d -- cgit v1.2.3 From 40dd4f87b3567d087bbcb071404b871af3601241 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Fri, 10 Dec 2010 06:07:06 +0100 Subject: Optimize and clean-up the exact equality/non-equality instructions The is_eq_exact/3 and is_ne_exact/3 instructions are commonly used with one immediate or literal operand. Introduce three new specialized instructions: i_is_eq_exact_literal/3 i_is_ne_exact_immed/3 i_is_ne_exact_literal/3 The i_is_ne_exact_literal/3 instruction is not very frequently used, but its existence is justified because we removed in a a previous commit the special instruction for matching bignums and we now use i_is_ne_exact_literal/3 instead. For consistency, rename the existing is_eq_immed/3 instruction to is_eq_exact_immed/3. While at it, remove the optimization of an is_eq/3 instruction with an immediate operand because that optimization is already done by the compiler. --- erts/emulator/beam/beam_emu.c | 47 +++++++++++++++++++++++++++++++++++++++++++ erts/emulator/beam/ops.tab | 46 ++++++++++++++++++++++++++++++------------ 2 files changed, 80 insertions(+), 13 deletions(-) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 741ba8fb93..b90613079a 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -738,6 +738,7 @@ extern int count_instructions; } while (0) #define EqualImmed(X, Y, Action) if (X != Y) { Action; } +#define NotEqualImmed(X, Y, Action) if (X == Y) { Action; } #define IsFloat(Src, Fail) if (is_not_float(Src)) { Fail; } @@ -1385,6 +1386,52 @@ void process_main(void) } Next(1); + { + Eterm is_eq_exact_lit_val; + + OpCase(i_is_eq_exact_literal_xfc): + is_eq_exact_lit_val = xb(Arg(0)); + I++; + goto do_is_eq_exact_literal; + + OpCase(i_is_eq_exact_literal_yfc): + is_eq_exact_lit_val = yb(Arg(0)); + I++; + goto do_is_eq_exact_literal; + + OpCase(i_is_eq_exact_literal_rfc): + is_eq_exact_lit_val = r(0); + + do_is_eq_exact_literal: + if (!eq(Arg(1), is_eq_exact_lit_val)) { + ClauseFail(); + } + Next(2); + } + + { + Eterm is_ne_exact_lit_val; + + OpCase(i_is_ne_exact_literal_xfc): + is_ne_exact_lit_val = xb(Arg(0)); + I++; + goto do_is_ne_exact_literal; + + OpCase(i_is_ne_exact_literal_yfc): + is_ne_exact_lit_val = yb(Arg(0)); + I++; + goto do_is_ne_exact_literal; + + OpCase(i_is_ne_exact_literal_rfc): + is_ne_exact_lit_val = r(0); + + do_is_ne_exact_literal: + if (eq(Arg(1), is_ne_exact_lit_val)) { + ClauseFail(); + } + Next(2); + } + OpCase(i_move_call_only_fcr): { r(0) = Arg(1); } diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 029d7b512c..9b3d1da018 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -339,31 +339,51 @@ i_wait_error_locked send # -# Comparisions. +# Optimized comparisons with one immediate/literal operand. # -is_eq_exact Lbl=f R=rxy C=ian => i_is_eq_immed Lbl R C -is_eq Lbl=f R=rxy C=an => i_is_eq_immed Lbl R C +is_eq_exact Lbl R=rxy C=ian => i_is_eq_exact_immed Lbl R C +is_eq_exact Lbl R=rxy C=q => i_is_eq_exact_literal R Lbl C + +is_ne_exact Lbl R=rxy C=ian => i_is_ne_exact_immed Lbl R C +is_ne_exact Lbl R=rxy C=q => i_is_ne_exact_literal R Lbl C + +%macro: i_is_eq_exact_immed EqualImmed -fail_action +i_is_eq_exact_immed f r c +i_is_eq_exact_immed f x c +i_is_eq_exact_immed f y c + +i_is_eq_exact_literal r f c +i_is_eq_exact_literal x f c +i_is_eq_exact_literal y f c + +%macro: i_is_ne_exact_immed NotEqualImmed -fail_action +i_is_ne_exact_immed f r c +i_is_ne_exact_immed f x c +i_is_ne_exact_immed f y c + +i_is_ne_exact_literal r f c +i_is_ne_exact_literal x f c +i_is_ne_exact_literal y f c + +# +# All other comparisons. +# + +is_eq_exact Lbl S1 S2 => i_fetch S1 S2 | i_is_eq_exact Lbl +is_ne_exact Lbl S1 S2 => i_fetch S1 S2 | i_is_ne_exact Lbl is_ge Lbl S1 S2 => i_fetch S1 S2 | i_is_ge Lbl is_lt Lbl S1 S2 => i_fetch S1 S2 | i_is_lt Lbl is_eq Lbl S1 S2 => i_fetch S1 S2 | i_is_eq Lbl is_ne Lbl S1 S2 => i_fetch S1 S2 | i_is_ne Lbl -is_eq_exact Lbl=f S1 S2 => i_fetch S1 S2 | i_is_eq_exact Lbl -is_ne_exact Lbl S1 S2 => i_fetch S1 S2 | i_is_ne_exact Lbl - +i_is_eq_exact f +i_is_ne_exact f i_is_lt f i_is_ge f i_is_eq f i_is_ne f -i_is_eq_exact f -i_is_ne_exact f - -%macro: i_is_eq_immed EqualImmed -fail_action -i_is_eq_immed f r c -i_is_eq_immed f x c -i_is_eq_immed f y c # # Putting things. -- cgit v1.2.3 From 51ddd2047c70bce67c8644d85f5d5de72842c320 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Fri, 10 Dec 2010 07:57:01 +0100 Subject: Combine a move + jump sequence into the move_jump instruction That will save one word and small amount of time for each occurrence. --- erts/emulator/beam/beam_emu.c | 5 +++++ erts/emulator/beam/ops.tab | 8 ++++++++ 2 files changed, 13 insertions(+) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index b90613079a..edb4d61ed2 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -688,6 +688,11 @@ extern int count_instructions; SET_I((BeamInstr *) CallDest); \ Dispatch(); +#define MoveJump(Src) \ + r(0) = (Src); \ + SET_I((BeamInstr *) Arg(0)); \ + Goto(*I); + #define GetList(Src, H, T) do { \ Eterm* tmp_ptr = list_val(Src); \ H = CAR(tmp_ptr); \ diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 9b3d1da018..ac63b352ef 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -279,6 +279,14 @@ system_limit j move R R => +move C=cxy r | jump Lbl => move_jump Lbl C + +%macro: move_jump MoveJump -nonext +move_jump f n +move_jump f c +move_jump f x +move_jump f y + move X1=x Y1=y | move X2=x Y2=y => move2 X1 Y1 X2 Y2 move Y1=y X1=x | move Y2=y X2=x => move2 Y1 X1 Y2 X2 move X1=x X2=x | move X3=x X4=x => move2 X1 X2 X3 X4 -- cgit v1.2.3 From d8a47b34744be64db10a458991bf340328af8f9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 16 Dec 2010 08:22:11 +0100 Subject: Introduce a few more variations of the move instructions Frequency counts show that move Const x(1) move Const x(2) are very common. --- erts/emulator/beam/beam_emu.c | 11 +++++++++++ erts/emulator/beam/ops.tab | 6 ++++++ 2 files changed, 17 insertions(+) (limited to 'erts/emulator/beam') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index edb4d61ed2..16741aa2d7 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -1520,6 +1520,17 @@ void process_main(void) NextPF(1, next); } + OpCase(move_x1_c): { + x(1) = Arg(0); + Next(1); + } + + OpCase(move_x2_c): { + x(2) = Arg(0); + Next(1); + } + + OpCase(return): { SET_I(c_p->cp); /* diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index ac63b352ef..e861f97e7a 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -291,6 +291,12 @@ move X1=x Y1=y | move X2=x Y2=y => move2 X1 Y1 X2 Y2 move Y1=y X1=x | move Y2=y X2=x => move2 Y1 X1 Y2 X2 move X1=x X2=x | move X3=x X4=x => move2 X1 X2 X3 X4 +move C=aiq X=x==1 => move_x1 C +move C=aiq X=x==2 => move_x2 C + +move_x1 c +move_x2 c + %macro: move2 Move2 -pack move2 x y x y move2 y x y x -- cgit v1.2.3