From 824a481d5f38c748cd6efcb83cba0b8d26b88768 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 11 Nov 2010 08:23:55 +0100 Subject: Optimize creation of tuples Combine the put_tuple/2 and all following put/1 instructions to one i_put_tuple/2 instruction. In general, that will reduce the number of instruction words by 50 percent. Measurements seem to indicate that the speed is about the same. --- erts/emulator/beam/beam_debug.c | 28 ++++++++++++++++ erts/emulator/beam/beam_emu.c | 42 ++++++++++++++++++----- erts/emulator/beam/beam_load.c | 74 +++++++++++++++++++++++++++++++++++++++++ erts/emulator/beam/ops.tab | 39 ++++++++-------------- 4 files changed, 149 insertions(+), 34 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c index 4fa9d60bca..6c16c24d0d 100644 --- a/erts/emulator/beam/beam_debug.c +++ b/erts/emulator/beam/beam_debug.c @@ -330,6 +330,7 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr) BeamInstr packed = 0; /* Accumulator for packed operations. */ BeamInstr args[8]; /* Arguments for this instruction. */ BeamInstr* ap; /* Pointer to arguments. */ + BeamInstr* unpacked; /* Unpacked arguments */ start_prog = opc[op].pack; @@ -551,6 +552,7 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr) * Print more information about certain instructions. */ + unpacked = ap; ap = addr + size; switch (op) { case op_i_select_val_sfI: @@ -598,6 +600,32 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr) } } break; + case op_i_put_tuple_rI: + case op_i_put_tuple_xI: + case op_i_put_tuple_yI: + { + int n = unpacked[-1]; + + while (n > 0) { + if (!is_header(ap[0])) { + erts_print(to, to_arg, " %T", (Eterm) ap[0]); + } else { + switch ((ap[0] >> 2) & 0x03) { + case R_REG_DEF: + erts_print(to, to_arg, " x(0)"); + break; + case X_REG_DEF: + erts_print(to, to_arg, " x(%d)", ap[0] >> 4); + break; + case Y_REG_DEF: + erts_print(to, to_arg, " y(%d)", ap[0] >> 4); + break; + } + } + ap++, size++, n--; + } + } + break; } erts_print(to, to_arg, "\n"); diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 82925fda2c..e36ac1f1e6 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -525,6 +525,11 @@ extern int count_instructions; SWAPIN; \ } while (0) +#define PutTuple(Dst, Arity) \ + do { \ + Dst = make_tuple(HTOP); \ + pt_arity = (Arity); \ + } while (0) /* * Check that we haven't used the reductions and jump to function pointed to by @@ -732,15 +737,6 @@ extern int count_instructions; (Dest) = (* (Eterm *) EXPAND_POINTER(tmp_arg1)); \ } while (0) -#define PutTuple(Arity, Src, Dest) \ - ASSERT(is_arity_value(Arity)); \ - Dest = make_tuple(HTOP); \ - HTOP[0] = (Arity); \ - HTOP[1] = (Src); \ - HTOP += 2 - -#define Put(Word) *HTOP++ = (Word) - #define EqualImmed(X, Y, Action) if (X != Y) { Action; } #define IsFloat(Src, Fail) if (is_not_float(Src)) { Fail; } @@ -1155,6 +1151,8 @@ void process_main(void) Uint temp_bits; /* Temporary used by BsSkipBits2 & BsGetInteger2 */ + Eterm pt_arity; /* Used by do_put_tuple */ + ERL_BITS_DECLARE_STATEP; /* Has to be last declaration */ @@ -1924,6 +1922,32 @@ void process_main(void) Goto(*I); } + do_put_tuple: { + Eterm* hp = HTOP; + + *hp++ = make_arityval(pt_arity); + + do { + Eterm term = *I++; + switch (term & _TAG_IMMED1_MASK) { + case (R_REG_DEF << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER: + *hp++ = r(0); + break; + case (X_REG_DEF << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER: + *hp++ = x(term >> _TAG_IMMED1_SIZE); + break; + case (Y_REG_DEF << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER: + *hp++ = y(term >> _TAG_IMMED1_SIZE); + break; + default: + *hp++ = term; + break; + } + } while (--pt_arity != 0); + HTOP = hp; + Goto(*I); + } + /* * All guards with zero arguments have special instructions: * self/0 diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index bc6186751e..a476e439ca 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -2015,6 +2015,30 @@ load_code(LoaderState* stp) stp->labels[tmp_op->a[arg].val].patches = ci; ci++; break; + case TAG_r: + CodeNeed(1); + code[ci++] = (R_REG_DEF << _TAG_PRIMARY_SIZE) | + TAG_PRIMARY_HEADER; + break; + case TAG_x: + CodeNeed(1); + code[ci++] = (tmp_op->a[arg].val << _TAG_IMMED1_SIZE) | + (X_REG_DEF << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER; + break; + case TAG_y: + CodeNeed(1); + code[ci++] = (tmp_op->a[arg].val << _TAG_IMMED1_SIZE) | + (Y_REG_DEF << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER; + break; + case TAG_n: + CodeNeed(1); + code[ci++] = NIL; + break; + case TAG_q: + CodeNeed(1); + new_literal_patch(stp, ci); + code[ci++] = tmp_op->a[arg].val; + break; default: LoadError1(stp, "unsupported primitive type '%c'", tag_to_letter[tmp_op->a[arg].type]); @@ -3440,6 +3464,56 @@ gen_guard_bif3(LoaderState* stp, GenOpArg Fail, GenOpArg Live, GenOpArg Bif, return op; } +static GenOp* +tuple_append_put5(LoaderState* stp, GenOpArg Arity, GenOpArg Dst, + GenOpArg* Puts, GenOpArg S1, GenOpArg S2, GenOpArg S3, + GenOpArg S4, GenOpArg S5) +{ + GenOp* op; + int arity = Arity.val; /* Arity of tuple, not the instruction */ + int i; + + NEW_GENOP(stp, op); + op->next = NULL; + GENOP_ARITY(op, arity+2+5); + op->op = genop_i_put_tuple_2; + op->a[0] = Dst; + op->a[1].type = TAG_u; + op->a[1].val = arity + 5; + for (i = 0; i < arity; i++) { + op->a[i+2] = Puts[i]; + } + op->a[arity+2] = S1; + op->a[arity+3] = S2; + op->a[arity+4] = S3; + op->a[arity+5] = S4; + op->a[arity+6] = S5; + return op; +} + +static GenOp* +tuple_append_put(LoaderState* stp, GenOpArg Arity, GenOpArg Dst, + GenOpArg* Puts, GenOpArg S) +{ + GenOp* op; + int arity = Arity.val; /* Arity of tuple, not the instruction */ + int i; + + NEW_GENOP(stp, op); + op->next = NULL; + GENOP_ARITY(op, arity+2+1); + op->op = genop_i_put_tuple_2; + op->a[0] = Dst; + op->a[1].type = TAG_u; + op->a[1].val = arity + 1; + for (i = 0; i < arity; i++) { + op->a[i+2] = Puts[i]; + } + op->a[arity+2] = S; + return op; +} + + /* * Freeze the code in memory, move the string table into place, diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 4546f056a9..f629cc80b6 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -345,25 +345,21 @@ i_is_eq_immed f y c # Putting things. # -put_tuple Arity Dst | put V => i_put_tuple Arity V Dst - -%macro: i_put_tuple PutTuple -pack -i_put_tuple A x x -i_put_tuple A y x -i_put_tuple A r x -i_put_tuple A n x -i_put_tuple A c x -i_put_tuple A x y -i_put_tuple A x r -i_put_tuple A y r -i_put_tuple A n r -i_put_tuple A c r +put_tuple Arity Dst => i_put_tuple Dst u -%cold -i_put_tuple A r y -i_put_tuple A y y -i_put_tuple A c y -%hot +i_put_tuple Dst Arity Puts=* | put S1 | put S2 | \ + put S3 | put S4 | put S5 => \ + tuple_append_put5(Arity, Dst, Puts, S1, S2, S3, S4, S5) + +i_put_tuple Dst Arity Puts=* | put S => \ + tuple_append_put(Arity, Dst, Puts, S) + +i_put_tuple/2 + +%macro:i_put_tuple PutTuple -pack -goto:do_put_tuple +i_put_tuple r I +i_put_tuple x I +i_put_tuple y I %macro:put_list PutList -pack -gen_dest @@ -416,13 +412,6 @@ put_list x r r put_list s s d %hot -%macro: put Put -put x -put r -put y -put c -put n - %macro: i_fetch FetchArgs -pack i_fetch c c i_fetch c r -- cgit v1.2.3