diff options
author | Björn Gustavsson <[email protected]> | 2019-03-04 06:43:06 +0100 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2019-03-06 15:42:43 +0100 |
commit | 82431098ec653fc98ab5a9114609f6c8a5a646e0 (patch) | |
tree | 99fcd3171233cce7a76ebab54510ea59c449c10d /erts | |
parent | 0a65a7b517ced326b0b0754497a2285821df60f8 (diff) | |
download | otp-82431098ec653fc98ab5a9114609f6c8a5a646e0.tar.gz otp-82431098ec653fc98ab5a9114609f6c8a5a646e0.tar.bz2 otp-82431098ec653fc98ab5a9114609f6c8a5a646e0.zip |
Reduce code size for binary matching instructions
The new compiler required adding support for Y register for all
binary matching instructions. That was (intentionally) done in a
naive way that simplicated duplicated the entire body of each
instruction.
Now it's time to be less naive. Rewrite the binary matching
instructions using micro instructions. Because some of the binary
instructions are huge, that will significantly decrease the size of
process_main().
When compiling with clang, a huge process_main() would mess up
profile-guide optimization resulting in a significant performance
degradation. On my Mac, profile-guide optimzation would decrease
the estone benchmark by 100K estones (about 20 percent). This commit
gives me back the lost estones.
Diffstat (limited to 'erts')
-rw-r--r-- | erts/emulator/beam/beam_load.c | 36 | ||||
-rw-r--r-- | erts/emulator/beam/bs_instrs.tab | 230 | ||||
-rw-r--r-- | erts/emulator/beam/ops.tab | 12 |
3 files changed, 210 insertions, 68 deletions
diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index f4eeb54a1b..5aaba78de4 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -3295,11 +3295,11 @@ gen_get_integer2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, GenOpArg Live, } else { op->op = genop_i_bs_get_integer_6; op->arity = 6; - op->a[0] = Fail; - op->a[1] = Live; - op->a[2].type = TAG_u; - op->a[2].val = (Unit.val << 3) | Flags.val; - op->a[3] = Ms; + op->a[0] = Ms; + op->a[1] = Fail; + op->a[2] = Live; + op->a[3].type = TAG_u; + op->a[3].val = (Unit.val << 3) | Flags.val; op->a[4] = Size; op->a[5] = Dst; op->next = NULL; @@ -3332,8 +3332,8 @@ gen_get_binary2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, GenOpArg Live, } else { op->op = genop_i_bs_get_binary_all2_5; op->arity = 5; - op->a[0] = Fail; - op->a[1] = Ms; + op->a[0] = Ms; + op->a[1] = Fail; op->a[2] = Live; op->a[3] = Unit; op->a[4] = Dst; @@ -3341,8 +3341,8 @@ gen_get_binary2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, GenOpArg Live, } else if (Size.type == TAG_i) { op->op = genop_i_bs_get_binary_imm2_6; op->arity = 6; - op->a[0] = Fail; - op->a[1] = Ms; + op->a[0] = Ms; + op->a[1] = Fail; op->a[2] = Live; op->a[3].type = TAG_u; if (!safe_mul(Size.val, Unit.val, &op->a[3].val)) { @@ -3362,8 +3362,8 @@ gen_get_binary2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, GenOpArg Live, } else { op->op = genop_i_bs_get_binary_imm2_6; op->arity = 6; - op->a[0] = Fail; - op->a[1] = Ms; + op->a[0] = Ms; + op->a[1] = Fail; op->a[2] = Live; op->a[3].type = TAG_u; if (!safe_mul(bigval, Unit.val, &op->a[3].val)) { @@ -3375,8 +3375,8 @@ gen_get_binary2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, GenOpArg Live, } else { op->op = genop_i_bs_get_binary2_6; op->arity = 6; - op->a[0] = Fail; - op->a[1] = Ms; + op->a[0] = Ms; + op->a[1] = Fail; op->a[2] = Live; op->a[3] = Size; op->a[4].type = TAG_u; @@ -3541,8 +3541,8 @@ gen_get_float2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, GenOpArg Live, NATIVE_ENDIAN(Flags); op->op = genop_i_bs_get_float2_6; op->arity = 6; - op->a[0] = Fail; - op->a[1] = Ms; + op->a[0] = Ms; + op->a[1] = Fail; op->a[2] = Live; op->a[3] = Size; op->a[4].type = TAG_u; @@ -3601,9 +3601,9 @@ gen_skip_bits2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms, } else { op->op = genop_i_bs_skip_bits2_4; op->arity = 4; - op->a[0] = Fail; - op->a[1] = Ms; - op->a[2] = Size; + op->a[0] = Ms; + op->a[1] = Size; + op->a[2] = Fail; op->a[3] = Unit; } op->next = NULL; diff --git a/erts/emulator/beam/bs_instrs.tab b/erts/emulator/beam/bs_instrs.tab index 2dde70c2e1..714f1d49ce 100644 --- a/erts/emulator/beam/bs_instrs.tab +++ b/erts/emulator/beam/bs_instrs.tab @@ -90,12 +90,22 @@ TEST_BIN_VHEAP(VNh, Nh, Live) { HEAP_SPACE_VERIFIED(need); } -i_bs_get_binary_all2(Fail, Ms, Live, Unit, Dst) { +i_bs_get_binary_all2 := i_bs_get_binary_all2.fetch.execute; + +i_bs_get_binary_all2.head() { + Eterm context; +} + +i_bs_get_binary_all2.fetch(Ctx) { + context = $Ctx; +} + +i_bs_get_binary_all2.execute(Fail, Live, Unit, Dst) { ErlBinMatchBuffer *_mb; Eterm _result; - $GC_TEST(0, ERL_SUB_BIN_SIZE, $Live); - _mb = ms_matchbuffer($Ms); + $GC_TEST_PRESERVE(ERL_SUB_BIN_SIZE, $Live, context); + _mb = ms_matchbuffer(context); if (((_mb->size - _mb->offset) % $Unit) == 0) { LIGHT_SWAPOUT; _result = erts_bs_get_binary_all_2(c_p, _mb); @@ -109,14 +119,23 @@ i_bs_get_binary_all2(Fail, Ms, Live, Unit, Dst) { $FAIL($Fail); } } +i_bs_get_binary2 := i_bs_get_binary2.fetch.execute; + +i_bs_get_binary2.head() { + Eterm context; +} -i_bs_get_binary2(Fail, Ms, Live, Sz, Flags, Dst) { +i_bs_get_binary2.fetch(Ctx) { + context = $Ctx; +} + +i_bs_get_binary2.execute(Fail, Live, Sz, Flags, Dst) { ErlBinMatchBuffer *_mb; Eterm _result; Uint _size; $BS_GET_FIELD_SIZE($Sz, (($Flags) >> 3), $FAIL($Fail), _size); - $GC_TEST(0, ERL_SUB_BIN_SIZE, $Live); - _mb = ms_matchbuffer($Ms); + $GC_TEST_PRESERVE(ERL_SUB_BIN_SIZE, $Live, context); + _mb = ms_matchbuffer(context); LIGHT_SWAPOUT; _result = erts_bs_get_binary_2(c_p, _size, $Flags, _mb); LIGHT_SWAPIN; @@ -129,11 +148,22 @@ i_bs_get_binary2(Fail, Ms, Live, Sz, Flags, Dst) { } } -i_bs_get_binary_imm2(Fail, Ms, Live, Sz, Flags, Dst) { +i_bs_get_binary_imm2 := i_bs_get_binary_imm2.fetch.execute; + +i_bs_get_binary_imm2.head() { + Eterm context; +} + +i_bs_get_binary_imm2.fetch(Ctx) { + context = $Ctx; +} + +i_bs_get_binary_imm2.execute(Fail, Live, Sz, Flags, Dst) { ErlBinMatchBuffer *_mb; Eterm _result; - $GC_TEST(0, heap_bin_size(ERL_ONHEAP_BIN_LIMIT), $Live); - _mb = ms_matchbuffer($Ms); + $GC_TEST_PRESERVE(heap_bin_size(ERL_ONHEAP_BIN_LIMIT), + $Live, context); + _mb = ms_matchbuffer(context); LIGHT_SWAPOUT; _result = erts_bs_get_binary_2(c_p, $Sz, $Flags, _mb); LIGHT_SWAPIN; @@ -145,8 +175,17 @@ i_bs_get_binary_imm2(Fail, Ms, Live, Sz, Flags, Dst) { $Dst = _result; } } +i_bs_get_float2 := i_bs_get_float2.fetch.execute; + +i_bs_get_float2.head() { + Eterm context; +} + +i_bs_get_float2.fetch(Ctx) { + context = $Ctx; +} -i_bs_get_float2(Fail, Ms, Live, Sz, Flags, Dst) { +i_bs_get_float2.execute(Fail, Live, Sz, Flags, Dst) { ErlBinMatchBuffer *_mb; Eterm _result; Sint _size; @@ -155,8 +194,8 @@ i_bs_get_float2(Fail, Ms, Live, Sz, Flags, Dst) { $FAIL($Fail); } _size *= (($Flags) >> 3); - $GC_TEST(0, FLOAT_SIZE_OBJECT, $Live); - _mb = ms_matchbuffer($Ms); + $GC_TEST_PRESERVE(FLOAT_SIZE_OBJECT, $Live, context); + _mb = ms_matchbuffer(context); LIGHT_SWAPOUT; _result = erts_bs_get_float_2(c_p, _size, ($Flags), _mb); LIGHT_SWAPIN; @@ -169,13 +208,24 @@ i_bs_get_float2(Fail, Ms, Live, Sz, Flags, Dst) { } } -i_bs_skip_bits2(Fail, Ms, Bits, Unit) { +i_bs_skip_bits2 := i_bs_skip_bits2.fetch.execute; + +i_bs_skip_bits2.head() { + Eterm context, bits; +} + +i_bs_skip_bits2.fetch(Ctx, Bits) { + context = $Ctx; + bits = $Bits; +} + +i_bs_skip_bits2.execute(Fail, Unit) { ErlBinMatchBuffer *_mb; size_t new_offset; Uint _size; - _mb = ms_matchbuffer($Ms); - $BS_GET_FIELD_SIZE($Bits, $Unit, $FAIL($Fail), _size); + _mb = ms_matchbuffer(context); + $BS_GET_FIELD_SIZE(bits, $Unit, $FAIL($Fail), _size); new_offset = _mb->offset + _size; if (new_offset <= _mb->size) { _mb->offset = new_offset; @@ -809,9 +859,19 @@ bs_test_unit8(Fail, Ctx) { } } -i_bs_get_integer_8(Ctx, Fail, Dst) { +i_bs_get_integer_8 := i_bs_get_integer_8.fetch.execute; + +i_bs_get_integer_8.head() { + Eterm context; +} + +i_bs_get_integer_8.fetch(Ctx) { + context = $Ctx; +} + +i_bs_get_integer_8.execute(Fail, Dst) { Eterm _result; - ErlBinMatchBuffer* _mb = ms_matchbuffer($Ctx); + ErlBinMatchBuffer* _mb = ms_matchbuffer(context); if (_mb->size - _mb->offset < 8) { $FAIL($Fail); @@ -825,9 +885,19 @@ i_bs_get_integer_8(Ctx, Fail, Dst) { $Dst = _result; } -i_bs_get_integer_16(Ctx, Fail, Dst) { +i_bs_get_integer_16 := i_bs_get_integer_16.fetch.execute; + +i_bs_get_integer_16.head() { + Eterm context; +} + +i_bs_get_integer_16.fetch(Ctx) { + context = $Ctx; +} + +i_bs_get_integer_16.execute(Fail, Dst) { Eterm _result; - ErlBinMatchBuffer* _mb = ms_matchbuffer($Ctx); + ErlBinMatchBuffer* _mb = ms_matchbuffer(context); if (_mb->size - _mb->offset < 16) { $FAIL($Fail); @@ -842,9 +912,19 @@ i_bs_get_integer_16(Ctx, Fail, Dst) { } %if ARCH_64 -i_bs_get_integer_32(Ctx, Fail, Dst) { +i_bs_get_integer_32 := i_bs_get_integer_32.fetch.execute; + +i_bs_get_integer_32.head() { + Eterm context; +} + +i_bs_get_integer_32.fetch(Ctx) { + context = $Ctx; +} + +i_bs_get_integer_32.execute(Fail, Dst) { Uint32 _integer; - ErlBinMatchBuffer* _mb = ms_matchbuffer($Ctx); + ErlBinMatchBuffer* _mb = ms_matchbuffer(context); if (_mb->size - _mb->offset < 32) { $FAIL($Fail); @@ -894,15 +974,23 @@ bs_get_integer.execute(Fail, Flags, Dst) { $Dst = result; } -i_bs_get_integer(Fail, Live, FlagsAndUnit, Ms, Sz, Dst) { +i_bs_get_integer := i_bs_get_integer.fetch.execute; + +i_bs_get_integer.head() { + Eterm context; +} + +i_bs_get_integer.fetch(Ctx) { + context = $Ctx; +} + +i_bs_get_integer.execute(Fail, Live, FlagsAndUnit, Sz, Dst) { Uint flags; Uint size; - Eterm ms; ErlBinMatchBuffer* mb; Eterm result; flags = $FlagsAndUnit; - ms = $Ms; $BS_GET_FIELD_SIZE($Sz, (flags >> 3), $FAIL($Fail), size); if (size >= SMALL_BITS) { Uint wordsneeded; @@ -913,15 +1001,15 @@ i_bs_get_integer(Fail, Live, FlagsAndUnit, Ms, Sz, Dst) { * Remember to re-acquire the matchbuffer after gc. */ - mb = ms_matchbuffer(ms); + mb = ms_matchbuffer(context); if (mb->size - mb->offset < size) { $FAIL($Fail); } wordsneeded = 1+WSIZE(NBYTES((Uint) size)); - $GC_TEST_PRESERVE(wordsneeded, $Live, ms); + $GC_TEST_PRESERVE(wordsneeded, $Live, context); $REFRESH_GEN_DEST(); } - mb = ms_matchbuffer(ms); + mb = ms_matchbuffer(context); LIGHT_SWAPOUT; result = erts_bs_get_integer_2(c_p, size, flags, mb); LIGHT_SWAPIN; @@ -932,9 +1020,19 @@ i_bs_get_integer(Fail, Live, FlagsAndUnit, Ms, Sz, Dst) { $Dst = result; } -i_bs_get_utf8(Ctx, Fail, Dst) { +i_bs_get_utf8 := i_bs_get_utf8.fetch.execute; + +i_bs_get_utf8.head() { + Eterm context; +} + +i_bs_get_utf8.fetch(Ctx) { + context = $Ctx; +} + +i_bs_get_utf8.execute(Fail, Dst) { Eterm result; - ErlBinMatchBuffer* mb = ms_matchbuffer($Ctx); + ErlBinMatchBuffer* mb = ms_matchbuffer(context); if (mb->size - mb->offset < 8) { $FAIL($Fail); @@ -957,8 +1055,18 @@ i_bs_get_utf8(Ctx, Fail, Dst) { $Dst = result; } -i_bs_get_utf16(Ctx, Fail, Flags, Dst) { - ErlBinMatchBuffer* mb = ms_matchbuffer($Ctx); +i_bs_get_utf16 := i_bs_get_utf16.fetch.execute; + +i_bs_get_utf16.head() { + Eterm context; +} + +i_bs_get_utf16.fetch(Ctx) { + context = $Ctx; +} + +i_bs_get_utf16.execute(Fail, Flags, Dst) { + ErlBinMatchBuffer* mb = ms_matchbuffer(context); Eterm result = erts_bs_get_utf16(mb, $Flags); if (is_non_value(result)) { @@ -1055,13 +1163,20 @@ i_bs_restore2(Src, Slot) { _ms->mb.offset = _ms->save_offset[$Slot]; } -bs_get_tail(Src, Dst, Live) { - ErlBinMatchBuffer* mb; - Uint size, offs; - ErlSubBin* sb; +bs_get_tail := bs_get_tail.fetch.execute; + +bs_get_tail.head() { Eterm context; +} +bs_get_tail.fetch(Src) { context = $Src; +} + +bs_get_tail.execute(Dst, Live) { + ErlBinMatchBuffer* mb; + Uint size, offs; + ErlSubBin* sb; ASSERT(header_is_bin_matchstate(*boxed_val(context))); @@ -1090,11 +1205,20 @@ bs_get_tail(Src, Dst, Live) { %if ARCH_64 -i_bs_start_match3_gp(Src, Live, Fail, Dst, Pos) { - Eterm context, header; - Uint position, live; +i_bs_start_match3_gp := i_bs_start_match3_gp.fetch.execute; +i_bs_start_match3_gp.head() { + Eterm context; +} + +i_bs_start_match3_gp.fetch(Src) { context = $Src; +} + +i_bs_start_match3_gp.execute(Live, Fail, Dst, Pos) { + Eterm header; + Uint position, live; + live = $Live; if (!is_boxed(context)) { @@ -1139,11 +1263,20 @@ i_bs_start_match3_gp(Src, Live, Fail, Dst, Pos) { $Pos = make_small(position); } -i_bs_start_match3(Src, Live, Fail, Dst) { - Eterm context, header; - Uint live; +i_bs_start_match3 := i_bs_start_match3.fetch.execute; + +i_bs_start_match3.head() { + Eterm context; +} +i_bs_start_match3.fetch(Src) { context = $Src; +} + +i_bs_start_match3.execute(Live, Fail, Dst) { + Eterm header; + Uint live; + live = $Live; if (!is_boxed(context)) { @@ -1261,11 +1394,20 @@ bs_get_position(Ctx, Dst, Live) { } } -i_bs_start_match3(Src, Live, Fail, Dst) { - Eterm context, header; - Uint live; +i_bs_start_match3 := i_bs_start_match3.fetch.execute; +i_bs_start_match3.head() { + Eterm context; +} + +i_bs_start_match3.fetch(Src) { context = $Src; +} + +i_bs_start_match3.execute(Live, Fail, Dst) { + Eterm header; + Uint live; + live = $Live; if (!is_boxed(context)) { diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 969d2728a5..2f00b82c1e 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -1160,7 +1160,7 @@ i_bs_get_integer_imm Ms Bits Live Fail Flags Y=y => \ i_bs_get_integer_small_imm xy W f? t x i_bs_get_integer_imm xy W t f? t x -i_bs_get_integer f? t t xy s d +i_bs_get_integer xy f? t t s d i_bs_get_integer_8 xy f? d i_bs_get_integer_16 xy f? d @@ -1172,9 +1172,9 @@ i_bs_get_integer_32 xy f? d bs_get_binary2 Fail=f Ms=xy Live=u Sz=sq Unit=u Flags=u Dst=d => \ gen_get_binary2(Fail, Ms, Live, Sz, Unit, Flags, Dst) -i_bs_get_binary_imm2 f? xy t W t d -i_bs_get_binary2 f xy t? s t d -i_bs_get_binary_all2 f? xy t t d +i_bs_get_binary_imm2 xy f? t W t d +i_bs_get_binary2 xy f t? s t d +i_bs_get_binary_all2 xy f? t t d i_bs_get_binary_all_reuse xy f? t # Fetching float from binaries. @@ -1183,7 +1183,7 @@ bs_get_float2 Fail=f Ms=xy Live=u Sz=s Unit=u Flags=u Dst=d => \ bs_get_float2 Fail=f Ms=x Live=u Sz=q Unit=u Flags=u Dst=d => jump Fail -i_bs_get_float2 f? xy t s t d +i_bs_get_float2 xy f? t s t d # Miscellanous @@ -1191,7 +1191,7 @@ bs_skip_bits2 Fail=f Ms=xy Sz=sq Unit=u Flags=u => \ gen_skip_bits2(Fail, Ms, Sz, Unit, Flags) i_bs_skip_bits_imm2 f? xy W -i_bs_skip_bits2 f? xy xy t +i_bs_skip_bits2 xy xy f? t i_bs_skip_bits_all2 f? xy t bs_test_tail2 Fail=f Ms=xy Bits=u==0 => bs_test_zero_tail2 Fail Ms |