From ec26a0c56cb6e28cc5a35ef72116275e5eeef823 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Fri, 1 Apr 2016 14:03:51 +0200 Subject: Refactor calls to transform_engine() We used to set last_op_next and last_op to NULL just in case. Setting last_op_next to causes a rescan of the instructions to find the last instruction in the chain, so we would want to avoid that unless really necessary. --- erts/emulator/beam/beam_load.c | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) (limited to 'erts/emulator/beam/beam_load.c') diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 16cbdbffea..2f2b433999 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -2028,11 +2028,7 @@ load_code(LoaderState* stp) ASSERT(arity == last_op->arity); do_transform: - if (stp->genop == NULL) { - last_op_next = NULL; - goto get_next_instr; - } - + ASSERT(stp->genop != NULL); if (gen_opc[stp->genop->op].transform != -1) { int need; tmp_op = stp->genop; @@ -2045,25 +2041,34 @@ load_code(LoaderState* stp) } switch (transform_engine(stp)) { case TE_FAIL: - last_op_next = NULL; - last_op = NULL; + /* + * No transformation found. stp->genop != NULL and + * last_op_next is still valid. Go ahead and load + * the instruction. + */ break; case TE_OK: + /* + * Some transformation was applied. last_op_next is + * no longer valid and stp->genop may be NULL. + * Try to transform again. + */ + if (stp->genop == NULL) { + last_op_next = &stp->genop; + goto get_next_instr; + } last_op_next = NULL; - last_op = NULL; goto do_transform; case TE_SHORT_WINDOW: - last_op_next = NULL; - last_op = NULL; + /* + * No transformation applied. stp->genop != NULL and + * last_op_next is still valid. Fetch a new instruction + * before trying the transformation again. + */ goto get_next_instr; } } - if (stp->genop == NULL) { - last_op_next = NULL; - goto get_next_instr; - } - /* * From the collected generic instruction, find the specific * instruction. @@ -2584,7 +2589,10 @@ load_code(LoaderState* stp) { GenOp* next = stp->genop->next; FREE_GENOP(stp, stp->genop); - stp->genop = next; + if ((stp->genop = next) == NULL) { + last_op_next = &stp->genop; + goto get_next_instr; + } goto do_transform; } } -- cgit v1.2.3 From 6d51b25958393d95dee32baafd708aa3909ddb5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Fri, 1 Apr 2016 16:07:37 +0200 Subject: Eliminate allocation of variables in transform_engine() When an instruction with a variable number operands (such as select_val) is seen of the left side of a transformation, the 'next_arg' instruction will allocate a buffer to fit all variables and all operands will be copied into the buffer. Very often, the 'commit' instruction will never be reached because of a test or predicate failing or because of a short window; in that case, the variable buffer will be deallocated. Note that originally there were only few instructions with a variable number of operands, but now common operations such as tuple building also have a variable number of operands. To avoid those frequent allocations and deallocations, modify the 'next_arg' instruction to only save a pointer to the first of the "rest" arguments. Also move the deallocation of the instructions on the left side from the 'commit' instruction to the 'end' instruction to ensure that 'store_rest_args' will still work. --- erts/emulator/beam/beam_load.c | 94 ++++++++++++------------------------------ 1 file changed, 27 insertions(+), 67 deletions(-) (limited to 'erts/emulator/beam/beam_load.c') diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 2f2b433999..c6c35e74c9 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -4813,31 +4813,25 @@ transform_engine(LoaderState* st) Uint op; int ap; /* Current argument. */ Uint* restart; /* Where to restart if current match fails. */ - GenOpArg def_vars[TE_MAX_VARS]; /* Default buffer for variables. */ - GenOpArg* var = def_vars; - int num_vars = 0; + GenOpArg var[TE_MAX_VARS]; /* Buffer for variables. */ + GenOpArg* rest_args = NULL; + int num_rest_args = 0; int i; /* General index. */ Uint mask; GenOp* instr; + GenOp* first = st->genop; + GenOp* keep = NULL; Uint* pc; - int rval; static Uint restart_fail[1] = {TOP_fail}; - ASSERT(gen_opc[st->genop->op].transform != -1); - pc = op_transform + gen_opc[st->genop->op].transform; - restart = pc; + ASSERT(gen_opc[first->op].transform != -1); + restart = op_transform + gen_opc[first->op].transform; restart: - if (var != def_vars) { - erts_free(ERTS_ALC_T_LOADER_TMP, (void *) var); - var = def_vars; - } ASSERT(restart != NULL); pc = restart; ASSERT(*pc < NUM_TOPS); /* Valid instruction? */ - instr = st->genop; - -#define RETURN(r) rval = (r); goto do_return; + instr = first; #ifdef DEBUG restart = NULL; @@ -4855,7 +4849,7 @@ transform_engine(LoaderState* st) * We'll need at least one more instruction to decide whether * this combination matches or not. */ - RETURN(TE_SHORT_WINDOW); + return TE_SHORT_WINDOW; } if (*pc++ != instr->op) goto restart; @@ -5017,19 +5011,9 @@ transform_engine(LoaderState* st) #if defined(TOP_rest_args) case TOP_rest_args: { - int n = *pc++; int formal_arity = gen_opc[instr->op].arity; - int j = formal_arity; - - num_vars = n + (instr->arity - formal_arity); - var = erts_alloc(ERTS_ALC_T_LOADER_TMP, - num_vars * sizeof(GenOpArg)); - for (i = 0; i < n; i++) { - var[i] = def_vars[i]; - } - while (i < num_vars) { - var[i++] = instr->a[j++]; - } + num_rest_args = instr->arity - formal_arity; + rest_args = instr->a + formal_arity; } break; #endif @@ -5038,16 +5022,8 @@ transform_engine(LoaderState* st) break; case TOP_commit: instr = instr->next; /* The next_instr was optimized away. */ - - /* - * The left-hand side of this transformation matched. - * Delete all matched instructions. - */ - while (st->genop != instr) { - GenOp* next = st->genop->next; - FREE_GENOP(st, st->genop); - st->genop = next; - } + keep = instr; + st->genop = instr; #ifdef DEBUG instr = 0; #endif @@ -5077,22 +5053,19 @@ transform_engine(LoaderState* st) lastp = &((*lastp)->next); } - instr = instr->next; /* The next_instr was optimized away. */ - - /* - * The left-hand side of this transformation matched. - * Delete all matched instructions. - */ - while (st->genop != instr) { - GenOp* next = st->genop->next; - FREE_GENOP(st, st->genop); - st->genop = next; - } - *lastp = st->genop; + keep = instr->next; /* The next_instr was optimized away. */ + *lastp = keep; st->genop = new_instr; } - RETURN(TE_OK); + /* FALLTHROUGH */ #endif + case TOP_end: + while (first != keep) { + GenOp* next = first->next; + FREE_GENOP(st, first); + first = next; + } + return TE_OK; case TOP_new_instr: /* * Note that the instructions are generated in reverse order. @@ -5123,14 +5096,10 @@ transform_engine(LoaderState* st) #if defined(TOP_store_rest_args) case TOP_store_rest_args: { - int n = *pc++; - int num_extra = num_vars - n; - - ASSERT(n <= num_vars); - GENOP_ARITY(instr, instr->arity+num_extra); + GENOP_ARITY(instr, instr->arity+num_rest_args); memcpy(instr->a, instr->def_args, ap*sizeof(GenOpArg)); - memcpy(instr->a+ap, var+n, num_extra*sizeof(GenOpArg)); - ap += num_extra; + memcpy(instr->a+ap, rest_args, num_rest_args*sizeof(GenOpArg)); + ap += num_rest_args; } break; #endif @@ -5142,21 +5111,12 @@ transform_engine(LoaderState* st) case TOP_try_me_else_fail: restart = restart_fail; break; - case TOP_end: - RETURN(TE_OK); case TOP_fail: - RETURN(TE_FAIL); + return TE_FAIL; default: ASSERT(0); } } -#undef RETURN - - do_return: - if (var != def_vars) { - erts_free(ERTS_ALC_T_LOADER_TMP, (void *) var); - } - return rval; } static void -- cgit v1.2.3 From c6cabe0b76dda183d209498e1e4e13e3407dcf9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 7 Dec 2015 14:51:44 +0100 Subject: Simplify window management for the transformation engine Generic instructions have a min_window field. Its purpose is to avoid calling transform_engine() when there are too few instructions in the current "transformation window" for a transformation to succeed. Currently it does not do much good since the window size will be decremented by one before being used. The reason for the subtraction is probably that in some circumstances in the past, the loader could read past the end of the BEAM module while attempting to fetch instructions to increase the window size. Therefore, it would not be safe to just remove the subtraction by one. The simplest and safest solution seems to always ensure that there are always at least TWO instructions when calling transform_engine(). That will be safe, as long as a BEAM module is always finished with an int_code_end/0 that is not involved in any transformation. --- erts/emulator/beam/beam_load.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'erts/emulator/beam/beam_load.c') diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index c6c35e74c9..5d03c98657 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -2030,14 +2030,14 @@ load_code(LoaderState* stp) do_transform: ASSERT(stp->genop != NULL); if (gen_opc[stp->genop->op].transform != -1) { - int need; - tmp_op = stp->genop; - - for (need = gen_opc[stp->genop->op].min_window-1; need > 0; need--) { - if (tmp_op == NULL) { - goto get_next_instr; - } - tmp_op = tmp_op->next; + if (stp->genop->next == NULL) { + /* + * Simple heuristic: Most transformations requires + * at least two instructions, so make sure that + * there are. That will reduce the number of + * TE_SHORT_WINDOWs. + */ + goto get_next_instr; } switch (transform_engine(stp)) { case TE_FAIL: -- cgit v1.2.3 From e93a66110aa27a5b8228fb46a3459a6de0e626d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 7 Dec 2015 16:12:21 +0100 Subject: Introduce a 'rename' instruction Introduce a 'rename' instruction that can be used to optimize simple renaming with unchanged operands such as: get_tuple_element Reg P Dst => i_get_tuple_element Reg P Dst By allowing it to lower the arity of instruction, transformations such as the following can be handled: trim N Remaining => i_trim N All in all, currently 67 transformations can be optimized in this way, including some commonly used ones. --- erts/emulator/beam/beam_load.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'erts/emulator/beam/beam_load.c') diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 5d03c98657..ad174664ae 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -5077,6 +5077,12 @@ transform_engine(LoaderState* st) instr->arity = gen_opc[op].arity; ap = 0; break; +#ifdef TOP_rename + case TOP_rename: + instr->op = op = *pc++; + instr->arity = gen_opc[op].arity; + return TE_OK; +#endif case TOP_store_type: i = *pc++; instr->a[ap].type = i; -- cgit v1.2.3 From 937f527054f13dd524588c064cd5d76e3cfd23eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 5 Apr 2016 12:56:57 +0200 Subject: Avoid rebuilding unchanged instructions In transformations such as: move S X0=x==0 | line Loc | call_ext Ar Func => \ line Loc | move S X0 | call_ext Ar Func we can avoid rebuilding the last instruction in the sequence by introducing a 'keep' instruction. Currently, there are only 13 transformations that are hit by this optimization, but most of them are frequently used. --- erts/emulator/beam/beam_load.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'erts/emulator/beam/beam_load.c') diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index ad174664ae..bdb451a6fe 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -5028,7 +5028,16 @@ transform_engine(LoaderState* st) instr = 0; #endif break; - +#if defined(TOP_keep) + case TOP_keep: + /* Keep the current instruction unchanged. */ + keep = instr; + st->genop = instr; +#ifdef DEBUG + instr = 0; +#endif + break; +#endif #if defined(TOP_call_end) case TOP_call_end: { -- cgit v1.2.3 From 4f33597d52a0cef2e47b07578bc8a35a17c2f969 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 6 Apr 2016 07:02:36 +0200 Subject: Don't let the loader do the compiler's job Optimizations that are possible to do by the compiler should be done by the compiler and not by the loader. If the compiler has done its job correctly, attempting to do the two transformations only wastes time. --- erts/emulator/beam/beam_load.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'erts/emulator/beam/beam_load.c') diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index bdb451a6fe..a98900460e 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -2735,13 +2735,6 @@ mixed_types(LoaderState* stp, GenOpArg Size, GenOpArg* Rest) return 0; } -static int -same_label(LoaderState* stp, GenOpArg Target, GenOpArg Label) -{ - return Target.type = TAG_f && Label.type == TAG_u && - Target.val == Label.val; -} - static int is_killed_apply(LoaderState* stp, GenOpArg Reg, GenOpArg Live) { -- cgit v1.2.3