aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/beam/beam_load.c
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2019-03-04 06:43:06 +0100
committerBjörn Gustavsson <[email protected]>2019-03-06 15:42:43 +0100
commit82431098ec653fc98ab5a9114609f6c8a5a646e0 (patch)
tree99fcd3171233cce7a76ebab54510ea59c449c10d /erts/emulator/beam/beam_load.c
parent0a65a7b517ced326b0b0754497a2285821df60f8 (diff)
downloadotp-82431098ec653fc98ab5a9114609f6c8a5a646e0.tar.gz
otp-82431098ec653fc98ab5a9114609f6c8a5a646e0.tar.bz2
otp-82431098ec653fc98ab5a9114609f6c8a5a646e0.zip
Reduce code size for binary matching instructions
The new compiler required adding support for Y register for all binary matching instructions. That was (intentionally) done in a naive way that simplicated duplicated the entire body of each instruction. Now it's time to be less naive. Rewrite the binary matching instructions using micro instructions. Because some of the binary instructions are huge, that will significantly decrease the size of process_main(). When compiling with clang, a huge process_main() would mess up profile-guide optimization resulting in a significant performance degradation. On my Mac, profile-guide optimzation would decrease the estone benchmark by 100K estones (about 20 percent). This commit gives me back the lost estones.
Diffstat (limited to 'erts/emulator/beam/beam_load.c')
-rw-r--r--erts/emulator/beam/beam_load.c36
1 files changed, 18 insertions, 18 deletions
diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c
index f4eeb54a1b..5aaba78de4 100644
--- a/erts/emulator/beam/beam_load.c
+++ b/erts/emulator/beam/beam_load.c
@@ -3295,11 +3295,11 @@ gen_get_integer2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, GenOpArg Live,
} else {
op->op = genop_i_bs_get_integer_6;
op->arity = 6;
- op->a[0] = Fail;
- op->a[1] = Live;
- op->a[2].type = TAG_u;
- op->a[2].val = (Unit.val << 3) | Flags.val;
- op->a[3] = Ms;
+ op->a[0] = Ms;
+ op->a[1] = Fail;
+ op->a[2] = Live;
+ op->a[3].type = TAG_u;
+ op->a[3].val = (Unit.val << 3) | Flags.val;
op->a[4] = Size;
op->a[5] = Dst;
op->next = NULL;
@@ -3332,8 +3332,8 @@ gen_get_binary2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, GenOpArg Live,
} else {
op->op = genop_i_bs_get_binary_all2_5;
op->arity = 5;
- op->a[0] = Fail;
- op->a[1] = Ms;
+ op->a[0] = Ms;
+ op->a[1] = Fail;
op->a[2] = Live;
op->a[3] = Unit;
op->a[4] = Dst;
@@ -3341,8 +3341,8 @@ gen_get_binary2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, GenOpArg Live,
} else if (Size.type == TAG_i) {
op->op = genop_i_bs_get_binary_imm2_6;
op->arity = 6;
- op->a[0] = Fail;
- op->a[1] = Ms;
+ op->a[0] = Ms;
+ op->a[1] = Fail;
op->a[2] = Live;
op->a[3].type = TAG_u;
if (!safe_mul(Size.val, Unit.val, &op->a[3].val)) {
@@ -3362,8 +3362,8 @@ gen_get_binary2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, GenOpArg Live,
} else {
op->op = genop_i_bs_get_binary_imm2_6;
op->arity = 6;
- op->a[0] = Fail;
- op->a[1] = Ms;
+ op->a[0] = Ms;
+ op->a[1] = Fail;
op->a[2] = Live;
op->a[3].type = TAG_u;
if (!safe_mul(bigval, Unit.val, &op->a[3].val)) {
@@ -3375,8 +3375,8 @@ gen_get_binary2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, GenOpArg Live,
} else {
op->op = genop_i_bs_get_binary2_6;
op->arity = 6;
- op->a[0] = Fail;
- op->a[1] = Ms;
+ op->a[0] = Ms;
+ op->a[1] = Fail;
op->a[2] = Live;
op->a[3] = Size;
op->a[4].type = TAG_u;
@@ -3541,8 +3541,8 @@ gen_get_float2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, GenOpArg Live,
NATIVE_ENDIAN(Flags);
op->op = genop_i_bs_get_float2_6;
op->arity = 6;
- op->a[0] = Fail;
- op->a[1] = Ms;
+ op->a[0] = Ms;
+ op->a[1] = Fail;
op->a[2] = Live;
op->a[3] = Size;
op->a[4].type = TAG_u;
@@ -3601,9 +3601,9 @@ gen_skip_bits2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms,
} else {
op->op = genop_i_bs_skip_bits2_4;
op->arity = 4;
- op->a[0] = Fail;
- op->a[1] = Ms;
- op->a[2] = Size;
+ op->a[0] = Ms;
+ op->a[1] = Size;
+ op->a[2] = Fail;
op->a[3] = Unit;
}
op->next = NULL;