aboutsummaryrefslogtreecommitdiffstats
path: root/erts
diff options
context:
space:
mode:
authorJohn Högberg <[email protected]>2018-05-08 08:52:22 +0200
committerJohn Högberg <[email protected]>2018-09-28 11:31:57 +0200
commitfd6246c5191d07b80bc7100b470a37a338accecd (patch)
tree9cd0a6750aa0fa680fc214b6650006f34d2e3818 /erts
parentb1726b022ad260497a1edc1cceb94935a06804e5 (diff)
downloadotp-fd6246c5191d07b80bc7100b470a37a338accecd.tar.gz
otp-fd6246c5191d07b80bc7100b470a37a338accecd.tar.bz2
otp-fd6246c5191d07b80bc7100b470a37a338accecd.zip
Rewrite BSM optimizations in the new SSA-based intermediate format
This commit improves the bit-syntax match optimization pass, leveraging the new SSA intermediate format to perform much more aggressive optimizations. Some highlights: * Watch contexts can be reused even after being passed to a function or being used in a try block. * Sub-binaries are no longer eagerly extracted, making it far easier to keep "happy paths" free from binary creation. * Trivial wrapper functions no longer disable context reuse.
Diffstat (limited to 'erts')
-rw-r--r--erts/emulator/beam/bs_instrs.tab294
-rw-r--r--erts/emulator/beam/erl_bits.c36
-rw-r--r--erts/emulator/beam/erl_bits.h13
-rw-r--r--erts/emulator/beam/ops.tab32
4 files changed, 367 insertions, 8 deletions
diff --git a/erts/emulator/beam/bs_instrs.tab b/erts/emulator/beam/bs_instrs.tab
index 9fd70cbbb6..2dde70c2e1 100644
--- a/erts/emulator/beam/bs_instrs.tab
+++ b/erts/emulator/beam/bs_instrs.tab
@@ -728,17 +728,23 @@ bs_start_match.execute(Fail, Live, Slots, Dst) {
$FAIL($Fail);
}
header = *boxed_val(context);
- slots = $Slots;
+
+ /* Reserve a slot for the start position. */
+ slots = $Slots + 1;
live = $Live;
+
if (header_is_bin_matchstate(header)) {
ErlBinMatchState* ms = (ErlBinMatchState *) boxed_val(context);
Uint actual_slots = HEADER_NUM_SLOTS(header);
+
+ /* We're not compatible with contexts created by bs_start_match3. */
+ ASSERT(actual_slots >= 1);
+
ms->save_offset[0] = ms->mb.offset;
- if (actual_slots < slots) {
+ if (ERTS_UNLIKELY(actual_slots < slots)) {
ErlBinMatchState* expanded;
Uint live = $Live;
Uint wordsneeded = ERL_BIN_MATCHSTATE_SIZE(slots);
-
$GC_TEST_PRESERVE(wordsneeded, live, context);
ms = (ErlBinMatchState *) boxed_val(context);
expanded = (ErlBinMatchState *) HTOP;
@@ -746,9 +752,10 @@ bs_start_match.execute(Fail, Live, Slots, Dst) {
*HTOP = HEADER_BIN_MATCHSTATE(slots);
HTOP += wordsneeded;
HEAP_SPACE_VERIFIED(0);
+ context = make_matchstate(expanded);
$REFRESH_GEN_DEST();
- $Dst = make_matchstate(expanded);
}
+ $Dst = context;
} else if (is_binary_header(header)) {
Eterm result;
Uint wordsneeded = ERL_BIN_MATCHSTATE_SIZE(slots);
@@ -1038,10 +1045,289 @@ i_bs_match_string(Ctx, Fail, Bits, Ptr) {
i_bs_save2(Src, Slot) {
ErlBinMatchState* _ms = (ErlBinMatchState*) boxed_val((Eterm) $Src);
+ ASSERT(HEADER_NUM_SLOTS(_ms->thing_word) > $Slot);
_ms->save_offset[$Slot] = _ms->mb.offset;
}
i_bs_restore2(Src, Slot) {
ErlBinMatchState* _ms = (ErlBinMatchState*) boxed_val((Eterm) $Src);
+ ASSERT(HEADER_NUM_SLOTS(_ms->thing_word) > $Slot);
_ms->mb.offset = _ms->save_offset[$Slot];
}
+
+bs_get_tail(Src, Dst, Live) {
+ ErlBinMatchBuffer* mb;
+ Uint size, offs;
+ ErlSubBin* sb;
+ Eterm context;
+
+ context = $Src;
+
+ ASSERT(header_is_bin_matchstate(*boxed_val(context)));
+
+ $GC_TEST_PRESERVE(ERL_SUB_BIN_SIZE, $Live, context);
+
+ mb = ms_matchbuffer(context);
+
+ offs = mb->offset;
+ size = mb->size - offs;
+
+ sb = (ErlSubBin *) HTOP;
+ HTOP += ERL_SUB_BIN_SIZE;
+
+ sb->thing_word = HEADER_SUB_BIN;
+ sb->size = BYTE_OFFSET(size);
+ sb->bitsize = BIT_OFFSET(size);
+ sb->offs = BYTE_OFFSET(offs);
+ sb->bitoffs = BIT_OFFSET(offs);
+ sb->is_writable = 0;
+ sb->orig = mb->orig;
+
+ $REFRESH_GEN_DEST();
+ $Dst = make_binary(sb);
+}
+
+
+%if ARCH_64
+
+i_bs_start_match3_gp(Src, Live, Fail, Dst, Pos) {
+ Eterm context, header;
+ Uint position, live;
+
+ context = $Src;
+ live = $Live;
+
+ if (!is_boxed(context)) {
+ $FAIL($Fail);
+ }
+
+ header = *boxed_val(context);
+
+ if (header_is_bin_matchstate(header)) {
+ ErlBinMatchBuffer *mb;
+
+ ASSERT(HEADER_NUM_SLOTS(header) == 0);
+
+ mb = ms_matchbuffer(context);
+ position = mb->offset;
+
+ $Dst = context;
+ } else if (is_binary_header(header)) {
+ ErlBinMatchState *ms;
+
+ $GC_TEST_PRESERVE(ERL_BIN_MATCHSTATE_SIZE(0), live, context);
+ HEAP_TOP(c_p) = HTOP;
+#ifdef DEBUG
+ c_p->stop = E; /* Needed for checking in HeapOnlyAlloc(). */
+#endif
+ ms = erts_bs_start_match_3(c_p, context);
+ HTOP = HEAP_TOP(c_p);
+ HEAP_SPACE_VERIFIED(0);
+
+ if (ms == NULL) {
+ $FAIL($Fail);
+ }
+
+ $REFRESH_GEN_DEST();
+ $Dst = make_matchstate(ms);
+ position = ms->mb.offset;
+ } else {
+ $FAIL($Fail);
+ }
+
+ ASSERT(IS_USMALL(0, position));
+ $Pos = make_small(position);
+}
+
+i_bs_start_match3(Src, Live, Fail, Dst) {
+ Eterm context, header;
+ Uint live;
+
+ context = $Src;
+ live = $Live;
+
+ if (!is_boxed(context)) {
+ $FAIL($Fail);
+ }
+
+ header = *boxed_val(context);
+
+ if (header_is_bin_matchstate(header)) {
+ ASSERT(HEADER_NUM_SLOTS(header) == 0);
+ $Dst = context;
+ } else if (is_binary_header(header)) {
+ ErlBinMatchState *ms;
+
+ $GC_TEST_PRESERVE(ERL_BIN_MATCHSTATE_SIZE(0), live, context);
+ HEAP_TOP(c_p) = HTOP;
+#ifdef DEBUG
+ c_p->stop = E; /* Needed for checking in HeapOnlyAlloc(). */
+#endif
+ ms = erts_bs_start_match_3(c_p, context);
+ HTOP = HEAP_TOP(c_p);
+ HEAP_SPACE_VERIFIED(0);
+
+ if (ms == NULL) {
+ $FAIL($Fail);
+ }
+
+ $REFRESH_GEN_DEST();
+ $Dst = make_matchstate(ms);
+ } else {
+ $FAIL($Fail);
+ }
+}
+
+bs_set_position(Ctx, Pos) {
+ ErlBinMatchBuffer* mb;
+ Eterm context;
+
+ context = $Ctx;
+ ASSERT(header_is_bin_matchstate(*boxed_val(context)));
+
+ mb = ms_matchbuffer(context);
+ mb->offset = unsigned_val($Pos);
+}
+
+i_bs_get_position(Ctx, Dst) {
+ ErlBinMatchBuffer* mb;
+ Eterm context;
+
+ context = $Ctx;
+ ASSERT(header_is_bin_matchstate(*boxed_val(context)));
+
+ mb = ms_matchbuffer(context);
+ $Dst = make_small(mb->offset);
+}
+
+%else
+
+#
+# Unlike their 64-bit counterparts, the 32-bit position instructions operate on
+# an offset from the "base position" of the context because storing raw
+# positions would lead to the creation of far too many bigints.
+#
+# When a match context is reused we check whether its position fits into an
+# immediate, and create a new match context if it does not. This means we only
+# have to allocate stuff roughly once every 16MB rather than every time we
+# match at a position beyond 16MB.
+#
+
+bs_set_position(Ctx, Pos) {
+ Eterm context, position;
+ ErlBinMatchState *ms;
+
+ context = $Ctx;
+ position = $Pos;
+
+ ASSERT(header_is_bin_matchstate(*boxed_val(context)));
+ ms = (ErlBinMatchState*)boxed_val(context);
+
+ if (ERTS_LIKELY(is_small(position))) {
+ ms->mb.offset = ms->save_offset[0] + unsigned_val(position);
+ } else {
+ ASSERT(is_big(position));
+ ms->mb.offset = ms->save_offset[0] + *BIG_V(big_val(position));
+ }
+}
+
+bs_get_position(Ctx, Dst, Live) {
+ ErlBinMatchState *ms;
+ Eterm context;
+ Uint position;
+
+ context = $Ctx;
+
+ ASSERT(header_is_bin_matchstate(*boxed_val(context)));
+ ms = (ErlBinMatchState*)boxed_val(context);
+
+ position = ms->mb.offset - ms->save_offset[0];
+
+ if (ERTS_LIKELY(IS_USMALL(0, position))) {
+ $Dst = make_small(position);
+ } else {
+ Eterm *hp;
+
+ $GC_TEST_PRESERVE(BIG_UINT_HEAP_SIZE, $Live, context);
+
+ hp = HTOP;
+ HTOP += BIG_UINT_HEAP_SIZE;
+
+ *hp = make_pos_bignum_header(1);
+ BIG_DIGIT(hp, 0) = position;
+
+ $REFRESH_GEN_DEST();
+ $Dst = make_big(hp);
+ }
+}
+
+i_bs_start_match3(Src, Live, Fail, Dst) {
+ Eterm context, header;
+ Uint live;
+
+ context = $Src;
+ live = $Live;
+
+ if (!is_boxed(context)) {
+ $FAIL($Fail);
+ }
+
+ header = *boxed_val(context);
+
+ if (header_is_bin_matchstate(header)) {
+ ErlBinMatchState *current_ms;
+ Uint position;
+
+ ASSERT(HEADER_NUM_SLOTS(header) == 1);
+
+ current_ms = (ErlBinMatchState*)boxed_val(context);
+ position = current_ms->mb.offset - current_ms->save_offset[0];
+
+ if (ERTS_LIKELY(IS_USMALL(0, position))) {
+ $Dst = context;
+ } else {
+ ErlBinMatchState *new_ms;
+
+ $GC_TEST_PRESERVE(ERL_BIN_MATCHSTATE_SIZE(1), live, context);
+ current_ms = (ErlBinMatchState*)boxed_val(context);
+
+ new_ms = (ErlBinMatchState*)HTOP;
+ HTOP += ERL_BIN_MATCHSTATE_SIZE(1);
+
+ new_ms->thing_word = HEADER_BIN_MATCHSTATE(1);
+ new_ms->save_offset[0] = current_ms->mb.offset;
+ new_ms->mb = current_ms->mb;
+
+ $REFRESH_GEN_DEST();
+ $Dst = make_matchstate(new_ms);
+ }
+ } else if (is_binary_header(header)) {
+ Eterm result;
+
+ $GC_TEST_PRESERVE(ERL_BIN_MATCHSTATE_SIZE(1), live, context);
+ HEAP_TOP(c_p) = HTOP;
+
+#ifdef DEBUG
+ c_p->stop = E; /* Needed for checking in HeapOnlyAlloc(). */
+#endif
+
+ /* We intentionally use erts_bs_start_match_2 so that we can use
+ * save_offset as a base for all saved positions on this context,
+ * allowing us to avoid bigints for much longer. */
+ result = erts_bs_start_match_2(c_p, context, 1);
+
+ HTOP = HEAP_TOP(c_p);
+ HEAP_SPACE_VERIFIED(0);
+
+ if (is_non_value(result)) {
+ $FAIL($Fail);
+ }
+
+ $REFRESH_GEN_DEST();
+ $Dst = result;
+ } else {
+ $FAIL($Fail);
+ }
+}
+
+%endif
diff --git a/erts/emulator/beam/erl_bits.c b/erts/emulator/beam/erl_bits.c
index 3a16913473..e82c776e70 100644
--- a/erts/emulator/beam/erl_bits.c
+++ b/erts/emulator/beam/erl_bits.c
@@ -144,6 +144,42 @@ erts_bs_start_match_2(Process *p, Eterm Binary, Uint Max)
return make_matchstate(ms);
}
+ErlBinMatchState *erts_bs_start_match_3(Process *p, Eterm Binary)
+{
+ Eterm Orig;
+ Uint offs;
+ Uint* hp;
+ Uint NeededSize;
+ ErlBinMatchState *ms;
+ Uint bitoffs;
+ Uint bitsize;
+ Uint total_bin_size;
+ ProcBin* pb;
+
+ ASSERT(is_binary(Binary));
+ total_bin_size = binary_size(Binary);
+ if ((total_bin_size >> (8*sizeof(Uint)-3)) != 0) {
+ return NULL;
+ }
+
+ NeededSize = ERL_BIN_MATCHSTATE_SIZE(0);
+ hp = HeapOnlyAlloc(p, NeededSize);
+ ms = (ErlBinMatchState *) hp;
+ ERTS_GET_REAL_BIN(Binary, Orig, offs, bitoffs, bitsize);
+ pb = (ProcBin *) boxed_val(Orig);
+ if (pb->thing_word == HEADER_PROC_BIN && pb->flags != 0) {
+ erts_emasculate_writable_binary(pb);
+ }
+
+ ms->thing_word = HEADER_BIN_MATCHSTATE(0);
+ (ms->mb).orig = Orig;
+ (ms->mb).base = binary_bytes(Orig);
+ (ms->mb).offset = 8 * offs + bitoffs;
+ (ms->mb).size = total_bin_size * 8 + (ms->mb).offset + bitsize;
+
+ return ms;
+}
+
#ifdef DEBUG
# define CHECK_MATCH_BUFFER(MB) check_match_buffer(MB)
diff --git a/erts/emulator/beam/erl_bits.h b/erts/emulator/beam/erl_bits.h
index 7beef5cfda..50d353e1fa 100644
--- a/erts/emulator/beam/erl_bits.h
+++ b/erts/emulator/beam/erl_bits.h
@@ -73,12 +73,16 @@ struct erl_bits_state {
typedef struct erl_bin_match_struct{
Eterm thing_word;
ErlBinMatchBuffer mb; /* Present match buffer */
- Eterm save_offset[1]; /* Saved offsets */
+ Eterm save_offset[1]; /* Saved offsets, only valid for contexts
+ * created through bs_start_match2. */
} ErlBinMatchState;
-#define ERL_BIN_MATCHSTATE_SIZE(_Max) ((sizeof(ErlBinMatchState) + (_Max)*sizeof(Eterm))/sizeof(Eterm))
-#define HEADER_BIN_MATCHSTATE(_Max) _make_header(ERL_BIN_MATCHSTATE_SIZE((_Max))-1, _TAG_HEADER_BIN_MATCHSTATE)
-#define HEADER_NUM_SLOTS(hdr) (header_arity(hdr)-sizeof(ErlBinMatchState)/sizeof(Eterm)+1)
+#define ERL_BIN_MATCHSTATE_SIZE(_Max) \
+ ((offsetof(ErlBinMatchState, save_offset) + (_Max)*sizeof(Eterm))/sizeof(Eterm))
+#define HEADER_BIN_MATCHSTATE(_Max) \
+ _make_header(ERL_BIN_MATCHSTATE_SIZE((_Max)) - 1, _TAG_HEADER_BIN_MATCHSTATE)
+#define HEADER_NUM_SLOTS(hdr) \
+ (header_arity(hdr) - (offsetof(ErlBinMatchState, save_offset) / sizeof(Eterm)) + 1)
#define make_matchstate(_Ms) make_boxed((Eterm*)(_Ms))
#define ms_matchbuffer(_Ms) &(((ErlBinMatchState*) boxed_val(_Ms))->mb)
@@ -144,6 +148,7 @@ void erts_bits_destroy_state(ERL_BITS_PROTO_0);
*/
Eterm erts_bs_start_match_2(Process *p, Eterm Bin, Uint Max);
+ErlBinMatchState *erts_bs_start_match_3(Process *p, Eterm Bin);
Eterm erts_bs_get_integer_2(Process *p, Uint num_bits, unsigned flags, ErlBinMatchBuffer* mb);
Eterm erts_bs_get_binary_2(Process *p, Uint num_bits, unsigned flags, ErlBinMatchBuffer* mb);
Eterm erts_bs_get_float_2(Process *p, Uint num_bits, unsigned flags, ErlBinMatchBuffer* mb);
diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab
index c3e71ff77a..349034e8ac 100644
--- a/erts/emulator/beam/ops.tab
+++ b/erts/emulator/beam/ops.tab
@@ -1167,6 +1167,38 @@ bs_context_to_binary Y=y => move Y x | bs_context_to_binary x
bs_context_to_binary x
+# Gets a bitstring from the tail of a context.
+bs_get_tail xy d t
+
+# New bs_start_match variant for contexts with external position storage.
+#
+# bs_get/set_position is used to save positions into registers instead of
+# "slots" in the context itself, which lets us continue matching even after
+# we've passed it off to another function.
+
+%if ARCH_64
+bs_start_match3 Fail Bin Live Ctx | bs_get_position Ctx Pos=x Ignored => \
+ i_bs_start_match3_gp Bin Live Fail Ctx Pos
+i_bs_start_match3_gp xy t f d x
+%endif
+
+bs_start_match3 Fail=f ica Live Dst => jump Fail
+bs_start_match3 Fail Bin Live Dst => i_bs_start_match3 Bin Live Fail Dst
+
+i_bs_start_match3 xy t f d
+
+# Match context position instructions. 64-bit assumes that all positions can
+# fit into an unsigned small.
+
+%if ARCH_64
+ bs_get_position Src Dst Live => i_bs_get_position Src Dst
+ i_bs_get_position xy xy
+ bs_set_position xy xy
+%else
+ bs_get_position xy d t?
+ bs_set_position xy xy
+%endif
+
#
# Utf8/utf16/utf32 support. (R12B-5)
#