diff options
author | John Högberg <[email protected]> | 2018-05-08 08:52:22 +0200 |
---|---|---|
committer | John Högberg <[email protected]> | 2018-09-28 11:31:57 +0200 |
commit | fd6246c5191d07b80bc7100b470a37a338accecd (patch) | |
tree | 9cd0a6750aa0fa680fc214b6650006f34d2e3818 /erts | |
parent | b1726b022ad260497a1edc1cceb94935a06804e5 (diff) | |
download | otp-fd6246c5191d07b80bc7100b470a37a338accecd.tar.gz otp-fd6246c5191d07b80bc7100b470a37a338accecd.tar.bz2 otp-fd6246c5191d07b80bc7100b470a37a338accecd.zip |
Rewrite BSM optimizations in the new SSA-based intermediate format
This commit improves the bit-syntax match optimization pass,
leveraging the new SSA intermediate format to perform much more
aggressive optimizations. Some highlights:
* Watch contexts can be reused even after being passed to a
function or being used in a try block.
* Sub-binaries are no longer eagerly extracted, making it far
easier to keep "happy paths" free from binary creation.
* Trivial wrapper functions no longer disable context reuse.
Diffstat (limited to 'erts')
-rw-r--r-- | erts/emulator/beam/bs_instrs.tab | 294 | ||||
-rw-r--r-- | erts/emulator/beam/erl_bits.c | 36 | ||||
-rw-r--r-- | erts/emulator/beam/erl_bits.h | 13 | ||||
-rw-r--r-- | erts/emulator/beam/ops.tab | 32 |
4 files changed, 367 insertions, 8 deletions
diff --git a/erts/emulator/beam/bs_instrs.tab b/erts/emulator/beam/bs_instrs.tab index 9fd70cbbb6..2dde70c2e1 100644 --- a/erts/emulator/beam/bs_instrs.tab +++ b/erts/emulator/beam/bs_instrs.tab @@ -728,17 +728,23 @@ bs_start_match.execute(Fail, Live, Slots, Dst) { $FAIL($Fail); } header = *boxed_val(context); - slots = $Slots; + + /* Reserve a slot for the start position. */ + slots = $Slots + 1; live = $Live; + if (header_is_bin_matchstate(header)) { ErlBinMatchState* ms = (ErlBinMatchState *) boxed_val(context); Uint actual_slots = HEADER_NUM_SLOTS(header); + + /* We're not compatible with contexts created by bs_start_match3. */ + ASSERT(actual_slots >= 1); + ms->save_offset[0] = ms->mb.offset; - if (actual_slots < slots) { + if (ERTS_UNLIKELY(actual_slots < slots)) { ErlBinMatchState* expanded; Uint live = $Live; Uint wordsneeded = ERL_BIN_MATCHSTATE_SIZE(slots); - $GC_TEST_PRESERVE(wordsneeded, live, context); ms = (ErlBinMatchState *) boxed_val(context); expanded = (ErlBinMatchState *) HTOP; @@ -746,9 +752,10 @@ bs_start_match.execute(Fail, Live, Slots, Dst) { *HTOP = HEADER_BIN_MATCHSTATE(slots); HTOP += wordsneeded; HEAP_SPACE_VERIFIED(0); + context = make_matchstate(expanded); $REFRESH_GEN_DEST(); - $Dst = make_matchstate(expanded); } + $Dst = context; } else if (is_binary_header(header)) { Eterm result; Uint wordsneeded = ERL_BIN_MATCHSTATE_SIZE(slots); @@ -1038,10 +1045,289 @@ i_bs_match_string(Ctx, Fail, Bits, Ptr) { i_bs_save2(Src, Slot) { ErlBinMatchState* _ms = (ErlBinMatchState*) boxed_val((Eterm) $Src); + ASSERT(HEADER_NUM_SLOTS(_ms->thing_word) > $Slot); _ms->save_offset[$Slot] = _ms->mb.offset; } i_bs_restore2(Src, Slot) { ErlBinMatchState* _ms = (ErlBinMatchState*) boxed_val((Eterm) $Src); + ASSERT(HEADER_NUM_SLOTS(_ms->thing_word) > $Slot); _ms->mb.offset = _ms->save_offset[$Slot]; } + +bs_get_tail(Src, Dst, Live) { + ErlBinMatchBuffer* mb; + Uint size, offs; + ErlSubBin* sb; + Eterm context; + + context = $Src; + + ASSERT(header_is_bin_matchstate(*boxed_val(context))); + + $GC_TEST_PRESERVE(ERL_SUB_BIN_SIZE, $Live, context); + + mb = ms_matchbuffer(context); + + offs = mb->offset; + size = mb->size - offs; + + sb = (ErlSubBin *) HTOP; + HTOP += ERL_SUB_BIN_SIZE; + + sb->thing_word = HEADER_SUB_BIN; + sb->size = BYTE_OFFSET(size); + sb->bitsize = BIT_OFFSET(size); + sb->offs = BYTE_OFFSET(offs); + sb->bitoffs = BIT_OFFSET(offs); + sb->is_writable = 0; + sb->orig = mb->orig; + + $REFRESH_GEN_DEST(); + $Dst = make_binary(sb); +} + + +%if ARCH_64 + +i_bs_start_match3_gp(Src, Live, Fail, Dst, Pos) { + Eterm context, header; + Uint position, live; + + context = $Src; + live = $Live; + + if (!is_boxed(context)) { + $FAIL($Fail); + } + + header = *boxed_val(context); + + if (header_is_bin_matchstate(header)) { + ErlBinMatchBuffer *mb; + + ASSERT(HEADER_NUM_SLOTS(header) == 0); + + mb = ms_matchbuffer(context); + position = mb->offset; + + $Dst = context; + } else if (is_binary_header(header)) { + ErlBinMatchState *ms; + + $GC_TEST_PRESERVE(ERL_BIN_MATCHSTATE_SIZE(0), live, context); + HEAP_TOP(c_p) = HTOP; +#ifdef DEBUG + c_p->stop = E; /* Needed for checking in HeapOnlyAlloc(). */ +#endif + ms = erts_bs_start_match_3(c_p, context); + HTOP = HEAP_TOP(c_p); + HEAP_SPACE_VERIFIED(0); + + if (ms == NULL) { + $FAIL($Fail); + } + + $REFRESH_GEN_DEST(); + $Dst = make_matchstate(ms); + position = ms->mb.offset; + } else { + $FAIL($Fail); + } + + ASSERT(IS_USMALL(0, position)); + $Pos = make_small(position); +} + +i_bs_start_match3(Src, Live, Fail, Dst) { + Eterm context, header; + Uint live; + + context = $Src; + live = $Live; + + if (!is_boxed(context)) { + $FAIL($Fail); + } + + header = *boxed_val(context); + + if (header_is_bin_matchstate(header)) { + ASSERT(HEADER_NUM_SLOTS(header) == 0); + $Dst = context; + } else if (is_binary_header(header)) { + ErlBinMatchState *ms; + + $GC_TEST_PRESERVE(ERL_BIN_MATCHSTATE_SIZE(0), live, context); + HEAP_TOP(c_p) = HTOP; +#ifdef DEBUG + c_p->stop = E; /* Needed for checking in HeapOnlyAlloc(). */ +#endif + ms = erts_bs_start_match_3(c_p, context); + HTOP = HEAP_TOP(c_p); + HEAP_SPACE_VERIFIED(0); + + if (ms == NULL) { + $FAIL($Fail); + } + + $REFRESH_GEN_DEST(); + $Dst = make_matchstate(ms); + } else { + $FAIL($Fail); + } +} + +bs_set_position(Ctx, Pos) { + ErlBinMatchBuffer* mb; + Eterm context; + + context = $Ctx; + ASSERT(header_is_bin_matchstate(*boxed_val(context))); + + mb = ms_matchbuffer(context); + mb->offset = unsigned_val($Pos); +} + +i_bs_get_position(Ctx, Dst) { + ErlBinMatchBuffer* mb; + Eterm context; + + context = $Ctx; + ASSERT(header_is_bin_matchstate(*boxed_val(context))); + + mb = ms_matchbuffer(context); + $Dst = make_small(mb->offset); +} + +%else + +# +# Unlike their 64-bit counterparts, the 32-bit position instructions operate on +# an offset from the "base position" of the context because storing raw +# positions would lead to the creation of far too many bigints. +# +# When a match context is reused we check whether its position fits into an +# immediate, and create a new match context if it does not. This means we only +# have to allocate stuff roughly once every 16MB rather than every time we +# match at a position beyond 16MB. +# + +bs_set_position(Ctx, Pos) { + Eterm context, position; + ErlBinMatchState *ms; + + context = $Ctx; + position = $Pos; + + ASSERT(header_is_bin_matchstate(*boxed_val(context))); + ms = (ErlBinMatchState*)boxed_val(context); + + if (ERTS_LIKELY(is_small(position))) { + ms->mb.offset = ms->save_offset[0] + unsigned_val(position); + } else { + ASSERT(is_big(position)); + ms->mb.offset = ms->save_offset[0] + *BIG_V(big_val(position)); + } +} + +bs_get_position(Ctx, Dst, Live) { + ErlBinMatchState *ms; + Eterm context; + Uint position; + + context = $Ctx; + + ASSERT(header_is_bin_matchstate(*boxed_val(context))); + ms = (ErlBinMatchState*)boxed_val(context); + + position = ms->mb.offset - ms->save_offset[0]; + + if (ERTS_LIKELY(IS_USMALL(0, position))) { + $Dst = make_small(position); + } else { + Eterm *hp; + + $GC_TEST_PRESERVE(BIG_UINT_HEAP_SIZE, $Live, context); + + hp = HTOP; + HTOP += BIG_UINT_HEAP_SIZE; + + *hp = make_pos_bignum_header(1); + BIG_DIGIT(hp, 0) = position; + + $REFRESH_GEN_DEST(); + $Dst = make_big(hp); + } +} + +i_bs_start_match3(Src, Live, Fail, Dst) { + Eterm context, header; + Uint live; + + context = $Src; + live = $Live; + + if (!is_boxed(context)) { + $FAIL($Fail); + } + + header = *boxed_val(context); + + if (header_is_bin_matchstate(header)) { + ErlBinMatchState *current_ms; + Uint position; + + ASSERT(HEADER_NUM_SLOTS(header) == 1); + + current_ms = (ErlBinMatchState*)boxed_val(context); + position = current_ms->mb.offset - current_ms->save_offset[0]; + + if (ERTS_LIKELY(IS_USMALL(0, position))) { + $Dst = context; + } else { + ErlBinMatchState *new_ms; + + $GC_TEST_PRESERVE(ERL_BIN_MATCHSTATE_SIZE(1), live, context); + current_ms = (ErlBinMatchState*)boxed_val(context); + + new_ms = (ErlBinMatchState*)HTOP; + HTOP += ERL_BIN_MATCHSTATE_SIZE(1); + + new_ms->thing_word = HEADER_BIN_MATCHSTATE(1); + new_ms->save_offset[0] = current_ms->mb.offset; + new_ms->mb = current_ms->mb; + + $REFRESH_GEN_DEST(); + $Dst = make_matchstate(new_ms); + } + } else if (is_binary_header(header)) { + Eterm result; + + $GC_TEST_PRESERVE(ERL_BIN_MATCHSTATE_SIZE(1), live, context); + HEAP_TOP(c_p) = HTOP; + +#ifdef DEBUG + c_p->stop = E; /* Needed for checking in HeapOnlyAlloc(). */ +#endif + + /* We intentionally use erts_bs_start_match_2 so that we can use + * save_offset as a base for all saved positions on this context, + * allowing us to avoid bigints for much longer. */ + result = erts_bs_start_match_2(c_p, context, 1); + + HTOP = HEAP_TOP(c_p); + HEAP_SPACE_VERIFIED(0); + + if (is_non_value(result)) { + $FAIL($Fail); + } + + $REFRESH_GEN_DEST(); + $Dst = result; + } else { + $FAIL($Fail); + } +} + +%endif diff --git a/erts/emulator/beam/erl_bits.c b/erts/emulator/beam/erl_bits.c index 3a16913473..e82c776e70 100644 --- a/erts/emulator/beam/erl_bits.c +++ b/erts/emulator/beam/erl_bits.c @@ -144,6 +144,42 @@ erts_bs_start_match_2(Process *p, Eterm Binary, Uint Max) return make_matchstate(ms); } +ErlBinMatchState *erts_bs_start_match_3(Process *p, Eterm Binary) +{ + Eterm Orig; + Uint offs; + Uint* hp; + Uint NeededSize; + ErlBinMatchState *ms; + Uint bitoffs; + Uint bitsize; + Uint total_bin_size; + ProcBin* pb; + + ASSERT(is_binary(Binary)); + total_bin_size = binary_size(Binary); + if ((total_bin_size >> (8*sizeof(Uint)-3)) != 0) { + return NULL; + } + + NeededSize = ERL_BIN_MATCHSTATE_SIZE(0); + hp = HeapOnlyAlloc(p, NeededSize); + ms = (ErlBinMatchState *) hp; + ERTS_GET_REAL_BIN(Binary, Orig, offs, bitoffs, bitsize); + pb = (ProcBin *) boxed_val(Orig); + if (pb->thing_word == HEADER_PROC_BIN && pb->flags != 0) { + erts_emasculate_writable_binary(pb); + } + + ms->thing_word = HEADER_BIN_MATCHSTATE(0); + (ms->mb).orig = Orig; + (ms->mb).base = binary_bytes(Orig); + (ms->mb).offset = 8 * offs + bitoffs; + (ms->mb).size = total_bin_size * 8 + (ms->mb).offset + bitsize; + + return ms; +} + #ifdef DEBUG # define CHECK_MATCH_BUFFER(MB) check_match_buffer(MB) diff --git a/erts/emulator/beam/erl_bits.h b/erts/emulator/beam/erl_bits.h index 7beef5cfda..50d353e1fa 100644 --- a/erts/emulator/beam/erl_bits.h +++ b/erts/emulator/beam/erl_bits.h @@ -73,12 +73,16 @@ struct erl_bits_state { typedef struct erl_bin_match_struct{ Eterm thing_word; ErlBinMatchBuffer mb; /* Present match buffer */ - Eterm save_offset[1]; /* Saved offsets */ + Eterm save_offset[1]; /* Saved offsets, only valid for contexts + * created through bs_start_match2. */ } ErlBinMatchState; -#define ERL_BIN_MATCHSTATE_SIZE(_Max) ((sizeof(ErlBinMatchState) + (_Max)*sizeof(Eterm))/sizeof(Eterm)) -#define HEADER_BIN_MATCHSTATE(_Max) _make_header(ERL_BIN_MATCHSTATE_SIZE((_Max))-1, _TAG_HEADER_BIN_MATCHSTATE) -#define HEADER_NUM_SLOTS(hdr) (header_arity(hdr)-sizeof(ErlBinMatchState)/sizeof(Eterm)+1) +#define ERL_BIN_MATCHSTATE_SIZE(_Max) \ + ((offsetof(ErlBinMatchState, save_offset) + (_Max)*sizeof(Eterm))/sizeof(Eterm)) +#define HEADER_BIN_MATCHSTATE(_Max) \ + _make_header(ERL_BIN_MATCHSTATE_SIZE((_Max)) - 1, _TAG_HEADER_BIN_MATCHSTATE) +#define HEADER_NUM_SLOTS(hdr) \ + (header_arity(hdr) - (offsetof(ErlBinMatchState, save_offset) / sizeof(Eterm)) + 1) #define make_matchstate(_Ms) make_boxed((Eterm*)(_Ms)) #define ms_matchbuffer(_Ms) &(((ErlBinMatchState*) boxed_val(_Ms))->mb) @@ -144,6 +148,7 @@ void erts_bits_destroy_state(ERL_BITS_PROTO_0); */ Eterm erts_bs_start_match_2(Process *p, Eterm Bin, Uint Max); +ErlBinMatchState *erts_bs_start_match_3(Process *p, Eterm Bin); Eterm erts_bs_get_integer_2(Process *p, Uint num_bits, unsigned flags, ErlBinMatchBuffer* mb); Eterm erts_bs_get_binary_2(Process *p, Uint num_bits, unsigned flags, ErlBinMatchBuffer* mb); Eterm erts_bs_get_float_2(Process *p, Uint num_bits, unsigned flags, ErlBinMatchBuffer* mb); diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index c3e71ff77a..349034e8ac 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -1167,6 +1167,38 @@ bs_context_to_binary Y=y => move Y x | bs_context_to_binary x bs_context_to_binary x +# Gets a bitstring from the tail of a context. +bs_get_tail xy d t + +# New bs_start_match variant for contexts with external position storage. +# +# bs_get/set_position is used to save positions into registers instead of +# "slots" in the context itself, which lets us continue matching even after +# we've passed it off to another function. + +%if ARCH_64 +bs_start_match3 Fail Bin Live Ctx | bs_get_position Ctx Pos=x Ignored => \ + i_bs_start_match3_gp Bin Live Fail Ctx Pos +i_bs_start_match3_gp xy t f d x +%endif + +bs_start_match3 Fail=f ica Live Dst => jump Fail +bs_start_match3 Fail Bin Live Dst => i_bs_start_match3 Bin Live Fail Dst + +i_bs_start_match3 xy t f d + +# Match context position instructions. 64-bit assumes that all positions can +# fit into an unsigned small. + +%if ARCH_64 + bs_get_position Src Dst Live => i_bs_get_position Src Dst + i_bs_get_position xy xy + bs_set_position xy xy +%else + bs_get_position xy d t? + bs_set_position xy xy +%endif + # # Utf8/utf16/utf32 support. (R12B-5) # |