aboutsummaryrefslogtreecommitdiffstats
path: root/erts
diff options
context:
space:
mode:
authorJohn Högberg <[email protected]>2018-09-28 14:08:23 +0200
committerGitHub <[email protected]>2018-09-28 14:08:23 +0200
commit7d941c529dc9db016af30cdace2df089d1648dbf (patch)
treece04f9922931e927ca22d75a2cd0fd4d0d0328eb /erts
parent08ef38b2c9f84ed118e693bff38efa69fc2c7eb8 (diff)
parentafa36d2081927c46a4c3ddceb40276fc7756bb51 (diff)
downloadotp-7d941c529dc9db016af30cdace2df089d1648dbf.tar.gz
otp-7d941c529dc9db016af30cdace2df089d1648dbf.tar.bz2
otp-7d941c529dc9db016af30cdace2df089d1648dbf.zip
Merge pull request #1958 from jhogberg/john/compiler/ssa-bsm-opt
Rewrite BSM optimizations in the new SSA-based intermediate format
Diffstat (limited to 'erts')
-rw-r--r--erts/emulator/beam/beam_load.c1
-rw-r--r--erts/emulator/beam/bs_instrs.tab309
-rw-r--r--erts/emulator/beam/erl_bits.c36
-rw-r--r--erts/emulator/beam/erl_bits.h13
-rw-r--r--erts/emulator/beam/ops.tab104
5 files changed, 416 insertions, 47 deletions
diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c
index 50cbb37f3e..c28a5c57e6 100644
--- a/erts/emulator/beam/beam_load.c
+++ b/erts/emulator/beam/beam_load.c
@@ -2868,6 +2868,7 @@ load_code(LoaderState* stp)
break;
case op_bs_put_string_WW:
case op_i_bs_match_string_xfWW:
+ case op_i_bs_match_string_yfWW:
new_string_patch(stp, ci-1);
break;
diff --git a/erts/emulator/beam/bs_instrs.tab b/erts/emulator/beam/bs_instrs.tab
index 61eb02a7a2..2dde70c2e1 100644
--- a/erts/emulator/beam/bs_instrs.tab
+++ b/erts/emulator/beam/bs_instrs.tab
@@ -102,6 +102,7 @@ i_bs_get_binary_all2(Fail, Ms, Live, Unit, Dst) {
LIGHT_SWAPIN;
HEAP_SPACE_VERIFIED(0);
ASSERT(is_value(_result));
+ $REFRESH_GEN_DEST();
$Dst = _result;
} else {
HEAP_SPACE_VERIFIED(0);
@@ -123,6 +124,7 @@ i_bs_get_binary2(Fail, Ms, Live, Sz, Flags, Dst) {
if (is_non_value(_result)) {
$FAIL($Fail);
} else {
+ $REFRESH_GEN_DEST();
$Dst = _result;
}
}
@@ -139,6 +141,7 @@ i_bs_get_binary_imm2(Fail, Ms, Live, Sz, Flags, Dst) {
if (is_non_value(_result)) {
$FAIL($Fail);
} else {
+ $REFRESH_GEN_DEST();
$Dst = _result;
}
}
@@ -161,6 +164,7 @@ i_bs_get_float2(Fail, Ms, Live, Sz, Flags, Dst) {
if (is_non_value(_result)) {
$FAIL($Fail);
} else {
+ $REFRESH_GEN_DEST();
$Dst = _result;
}
}
@@ -724,26 +728,34 @@ bs_start_match.execute(Fail, Live, Slots, Dst) {
$FAIL($Fail);
}
header = *boxed_val(context);
- slots = $Slots;
+
+ /* Reserve a slot for the start position. */
+ slots = $Slots + 1;
live = $Live;
+
if (header_is_bin_matchstate(header)) {
ErlBinMatchState* ms = (ErlBinMatchState *) boxed_val(context);
Uint actual_slots = HEADER_NUM_SLOTS(header);
+
+ /* We're not compatible with contexts created by bs_start_match3. */
+ ASSERT(actual_slots >= 1);
+
ms->save_offset[0] = ms->mb.offset;
- if (actual_slots < slots) {
- ErlBinMatchState* dst;
+ if (ERTS_UNLIKELY(actual_slots < slots)) {
+ ErlBinMatchState* expanded;
Uint live = $Live;
Uint wordsneeded = ERL_BIN_MATCHSTATE_SIZE(slots);
-
$GC_TEST_PRESERVE(wordsneeded, live, context);
ms = (ErlBinMatchState *) boxed_val(context);
- dst = (ErlBinMatchState *) HTOP;
- *dst = *ms;
+ expanded = (ErlBinMatchState *) HTOP;
+ *expanded = *ms;
*HTOP = HEADER_BIN_MATCHSTATE(slots);
HTOP += wordsneeded;
HEAP_SPACE_VERIFIED(0);
- $Dst = make_matchstate(dst);
+ context = make_matchstate(expanded);
+ $REFRESH_GEN_DEST();
}
+ $Dst = context;
} else if (is_binary_header(header)) {
Eterm result;
Uint wordsneeded = ERL_BIN_MATCHSTATE_SIZE(slots);
@@ -758,6 +770,7 @@ bs_start_match.execute(Fail, Live, Slots, Dst) {
if (is_non_value(result)) {
$FAIL($Fail);
}
+ $REFRESH_GEN_DEST();
$Dst = result;
} else {
$FAIL($Fail);
@@ -906,6 +919,7 @@ i_bs_get_integer(Fail, Live, FlagsAndUnit, Ms, Sz, Dst) {
}
wordsneeded = 1+WSIZE(NBYTES((Uint) size));
$GC_TEST_PRESERVE(wordsneeded, $Live, ms);
+ $REFRESH_GEN_DEST();
}
mb = ms_matchbuffer(ms);
LIGHT_SWAPOUT;
@@ -939,6 +953,7 @@ i_bs_get_utf8(Ctx, Fail, Dst) {
if (is_non_value(result)) {
$FAIL($Fail);
}
+ $REFRESH_GEN_DEST();
$Dst = result;
}
@@ -949,6 +964,7 @@ i_bs_get_utf16(Ctx, Fail, Flags, Dst) {
if (is_non_value(result)) {
$FAIL($Fail);
}
+ $REFRESH_GEN_DEST();
$Dst = result;
}
@@ -1029,10 +1045,289 @@ i_bs_match_string(Ctx, Fail, Bits, Ptr) {
i_bs_save2(Src, Slot) {
ErlBinMatchState* _ms = (ErlBinMatchState*) boxed_val((Eterm) $Src);
+ ASSERT(HEADER_NUM_SLOTS(_ms->thing_word) > $Slot);
_ms->save_offset[$Slot] = _ms->mb.offset;
}
i_bs_restore2(Src, Slot) {
ErlBinMatchState* _ms = (ErlBinMatchState*) boxed_val((Eterm) $Src);
+ ASSERT(HEADER_NUM_SLOTS(_ms->thing_word) > $Slot);
_ms->mb.offset = _ms->save_offset[$Slot];
}
+
+bs_get_tail(Src, Dst, Live) {
+ ErlBinMatchBuffer* mb;
+ Uint size, offs;
+ ErlSubBin* sb;
+ Eterm context;
+
+ context = $Src;
+
+ ASSERT(header_is_bin_matchstate(*boxed_val(context)));
+
+ $GC_TEST_PRESERVE(ERL_SUB_BIN_SIZE, $Live, context);
+
+ mb = ms_matchbuffer(context);
+
+ offs = mb->offset;
+ size = mb->size - offs;
+
+ sb = (ErlSubBin *) HTOP;
+ HTOP += ERL_SUB_BIN_SIZE;
+
+ sb->thing_word = HEADER_SUB_BIN;
+ sb->size = BYTE_OFFSET(size);
+ sb->bitsize = BIT_OFFSET(size);
+ sb->offs = BYTE_OFFSET(offs);
+ sb->bitoffs = BIT_OFFSET(offs);
+ sb->is_writable = 0;
+ sb->orig = mb->orig;
+
+ $REFRESH_GEN_DEST();
+ $Dst = make_binary(sb);
+}
+
+
+%if ARCH_64
+
+i_bs_start_match3_gp(Src, Live, Fail, Dst, Pos) {
+ Eterm context, header;
+ Uint position, live;
+
+ context = $Src;
+ live = $Live;
+
+ if (!is_boxed(context)) {
+ $FAIL($Fail);
+ }
+
+ header = *boxed_val(context);
+
+ if (header_is_bin_matchstate(header)) {
+ ErlBinMatchBuffer *mb;
+
+ ASSERT(HEADER_NUM_SLOTS(header) == 0);
+
+ mb = ms_matchbuffer(context);
+ position = mb->offset;
+
+ $Dst = context;
+ } else if (is_binary_header(header)) {
+ ErlBinMatchState *ms;
+
+ $GC_TEST_PRESERVE(ERL_BIN_MATCHSTATE_SIZE(0), live, context);
+ HEAP_TOP(c_p) = HTOP;
+#ifdef DEBUG
+ c_p->stop = E; /* Needed for checking in HeapOnlyAlloc(). */
+#endif
+ ms = erts_bs_start_match_3(c_p, context);
+ HTOP = HEAP_TOP(c_p);
+ HEAP_SPACE_VERIFIED(0);
+
+ if (ms == NULL) {
+ $FAIL($Fail);
+ }
+
+ $REFRESH_GEN_DEST();
+ $Dst = make_matchstate(ms);
+ position = ms->mb.offset;
+ } else {
+ $FAIL($Fail);
+ }
+
+ ASSERT(IS_USMALL(0, position));
+ $Pos = make_small(position);
+}
+
+i_bs_start_match3(Src, Live, Fail, Dst) {
+ Eterm context, header;
+ Uint live;
+
+ context = $Src;
+ live = $Live;
+
+ if (!is_boxed(context)) {
+ $FAIL($Fail);
+ }
+
+ header = *boxed_val(context);
+
+ if (header_is_bin_matchstate(header)) {
+ ASSERT(HEADER_NUM_SLOTS(header) == 0);
+ $Dst = context;
+ } else if (is_binary_header(header)) {
+ ErlBinMatchState *ms;
+
+ $GC_TEST_PRESERVE(ERL_BIN_MATCHSTATE_SIZE(0), live, context);
+ HEAP_TOP(c_p) = HTOP;
+#ifdef DEBUG
+ c_p->stop = E; /* Needed for checking in HeapOnlyAlloc(). */
+#endif
+ ms = erts_bs_start_match_3(c_p, context);
+ HTOP = HEAP_TOP(c_p);
+ HEAP_SPACE_VERIFIED(0);
+
+ if (ms == NULL) {
+ $FAIL($Fail);
+ }
+
+ $REFRESH_GEN_DEST();
+ $Dst = make_matchstate(ms);
+ } else {
+ $FAIL($Fail);
+ }
+}
+
+bs_set_position(Ctx, Pos) {
+ ErlBinMatchBuffer* mb;
+ Eterm context;
+
+ context = $Ctx;
+ ASSERT(header_is_bin_matchstate(*boxed_val(context)));
+
+ mb = ms_matchbuffer(context);
+ mb->offset = unsigned_val($Pos);
+}
+
+i_bs_get_position(Ctx, Dst) {
+ ErlBinMatchBuffer* mb;
+ Eterm context;
+
+ context = $Ctx;
+ ASSERT(header_is_bin_matchstate(*boxed_val(context)));
+
+ mb = ms_matchbuffer(context);
+ $Dst = make_small(mb->offset);
+}
+
+%else
+
+#
+# Unlike their 64-bit counterparts, the 32-bit position instructions operate on
+# an offset from the "base position" of the context because storing raw
+# positions would lead to the creation of far too many bigints.
+#
+# When a match context is reused we check whether its position fits into an
+# immediate, and create a new match context if it does not. This means we only
+# have to allocate stuff roughly once every 16MB rather than every time we
+# match at a position beyond 16MB.
+#
+
+bs_set_position(Ctx, Pos) {
+ Eterm context, position;
+ ErlBinMatchState *ms;
+
+ context = $Ctx;
+ position = $Pos;
+
+ ASSERT(header_is_bin_matchstate(*boxed_val(context)));
+ ms = (ErlBinMatchState*)boxed_val(context);
+
+ if (ERTS_LIKELY(is_small(position))) {
+ ms->mb.offset = ms->save_offset[0] + unsigned_val(position);
+ } else {
+ ASSERT(is_big(position));
+ ms->mb.offset = ms->save_offset[0] + *BIG_V(big_val(position));
+ }
+}
+
+bs_get_position(Ctx, Dst, Live) {
+ ErlBinMatchState *ms;
+ Eterm context;
+ Uint position;
+
+ context = $Ctx;
+
+ ASSERT(header_is_bin_matchstate(*boxed_val(context)));
+ ms = (ErlBinMatchState*)boxed_val(context);
+
+ position = ms->mb.offset - ms->save_offset[0];
+
+ if (ERTS_LIKELY(IS_USMALL(0, position))) {
+ $Dst = make_small(position);
+ } else {
+ Eterm *hp;
+
+ $GC_TEST_PRESERVE(BIG_UINT_HEAP_SIZE, $Live, context);
+
+ hp = HTOP;
+ HTOP += BIG_UINT_HEAP_SIZE;
+
+ *hp = make_pos_bignum_header(1);
+ BIG_DIGIT(hp, 0) = position;
+
+ $REFRESH_GEN_DEST();
+ $Dst = make_big(hp);
+ }
+}
+
+i_bs_start_match3(Src, Live, Fail, Dst) {
+ Eterm context, header;
+ Uint live;
+
+ context = $Src;
+ live = $Live;
+
+ if (!is_boxed(context)) {
+ $FAIL($Fail);
+ }
+
+ header = *boxed_val(context);
+
+ if (header_is_bin_matchstate(header)) {
+ ErlBinMatchState *current_ms;
+ Uint position;
+
+ ASSERT(HEADER_NUM_SLOTS(header) == 1);
+
+ current_ms = (ErlBinMatchState*)boxed_val(context);
+ position = current_ms->mb.offset - current_ms->save_offset[0];
+
+ if (ERTS_LIKELY(IS_USMALL(0, position))) {
+ $Dst = context;
+ } else {
+ ErlBinMatchState *new_ms;
+
+ $GC_TEST_PRESERVE(ERL_BIN_MATCHSTATE_SIZE(1), live, context);
+ current_ms = (ErlBinMatchState*)boxed_val(context);
+
+ new_ms = (ErlBinMatchState*)HTOP;
+ HTOP += ERL_BIN_MATCHSTATE_SIZE(1);
+
+ new_ms->thing_word = HEADER_BIN_MATCHSTATE(1);
+ new_ms->save_offset[0] = current_ms->mb.offset;
+ new_ms->mb = current_ms->mb;
+
+ $REFRESH_GEN_DEST();
+ $Dst = make_matchstate(new_ms);
+ }
+ } else if (is_binary_header(header)) {
+ Eterm result;
+
+ $GC_TEST_PRESERVE(ERL_BIN_MATCHSTATE_SIZE(1), live, context);
+ HEAP_TOP(c_p) = HTOP;
+
+#ifdef DEBUG
+ c_p->stop = E; /* Needed for checking in HeapOnlyAlloc(). */
+#endif
+
+ /* We intentionally use erts_bs_start_match_2 so that we can use
+ * save_offset as a base for all saved positions on this context,
+ * allowing us to avoid bigints for much longer. */
+ result = erts_bs_start_match_2(c_p, context, 1);
+
+ HTOP = HEAP_TOP(c_p);
+ HEAP_SPACE_VERIFIED(0);
+
+ if (is_non_value(result)) {
+ $FAIL($Fail);
+ }
+
+ $REFRESH_GEN_DEST();
+ $Dst = result;
+ } else {
+ $FAIL($Fail);
+ }
+}
+
+%endif
diff --git a/erts/emulator/beam/erl_bits.c b/erts/emulator/beam/erl_bits.c
index 3a16913473..e82c776e70 100644
--- a/erts/emulator/beam/erl_bits.c
+++ b/erts/emulator/beam/erl_bits.c
@@ -144,6 +144,42 @@ erts_bs_start_match_2(Process *p, Eterm Binary, Uint Max)
return make_matchstate(ms);
}
+ErlBinMatchState *erts_bs_start_match_3(Process *p, Eterm Binary)
+{
+ Eterm Orig;
+ Uint offs;
+ Uint* hp;
+ Uint NeededSize;
+ ErlBinMatchState *ms;
+ Uint bitoffs;
+ Uint bitsize;
+ Uint total_bin_size;
+ ProcBin* pb;
+
+ ASSERT(is_binary(Binary));
+ total_bin_size = binary_size(Binary);
+ if ((total_bin_size >> (8*sizeof(Uint)-3)) != 0) {
+ return NULL;
+ }
+
+ NeededSize = ERL_BIN_MATCHSTATE_SIZE(0);
+ hp = HeapOnlyAlloc(p, NeededSize);
+ ms = (ErlBinMatchState *) hp;
+ ERTS_GET_REAL_BIN(Binary, Orig, offs, bitoffs, bitsize);
+ pb = (ProcBin *) boxed_val(Orig);
+ if (pb->thing_word == HEADER_PROC_BIN && pb->flags != 0) {
+ erts_emasculate_writable_binary(pb);
+ }
+
+ ms->thing_word = HEADER_BIN_MATCHSTATE(0);
+ (ms->mb).orig = Orig;
+ (ms->mb).base = binary_bytes(Orig);
+ (ms->mb).offset = 8 * offs + bitoffs;
+ (ms->mb).size = total_bin_size * 8 + (ms->mb).offset + bitsize;
+
+ return ms;
+}
+
#ifdef DEBUG
# define CHECK_MATCH_BUFFER(MB) check_match_buffer(MB)
diff --git a/erts/emulator/beam/erl_bits.h b/erts/emulator/beam/erl_bits.h
index 7beef5cfda..50d353e1fa 100644
--- a/erts/emulator/beam/erl_bits.h
+++ b/erts/emulator/beam/erl_bits.h
@@ -73,12 +73,16 @@ struct erl_bits_state {
typedef struct erl_bin_match_struct{
Eterm thing_word;
ErlBinMatchBuffer mb; /* Present match buffer */
- Eterm save_offset[1]; /* Saved offsets */
+ Eterm save_offset[1]; /* Saved offsets, only valid for contexts
+ * created through bs_start_match2. */
} ErlBinMatchState;
-#define ERL_BIN_MATCHSTATE_SIZE(_Max) ((sizeof(ErlBinMatchState) + (_Max)*sizeof(Eterm))/sizeof(Eterm))
-#define HEADER_BIN_MATCHSTATE(_Max) _make_header(ERL_BIN_MATCHSTATE_SIZE((_Max))-1, _TAG_HEADER_BIN_MATCHSTATE)
-#define HEADER_NUM_SLOTS(hdr) (header_arity(hdr)-sizeof(ErlBinMatchState)/sizeof(Eterm)+1)
+#define ERL_BIN_MATCHSTATE_SIZE(_Max) \
+ ((offsetof(ErlBinMatchState, save_offset) + (_Max)*sizeof(Eterm))/sizeof(Eterm))
+#define HEADER_BIN_MATCHSTATE(_Max) \
+ _make_header(ERL_BIN_MATCHSTATE_SIZE((_Max)) - 1, _TAG_HEADER_BIN_MATCHSTATE)
+#define HEADER_NUM_SLOTS(hdr) \
+ (header_arity(hdr) - (offsetof(ErlBinMatchState, save_offset) / sizeof(Eterm)) + 1)
#define make_matchstate(_Ms) make_boxed((Eterm*)(_Ms))
#define ms_matchbuffer(_Ms) &(((ErlBinMatchState*) boxed_val(_Ms))->mb)
@@ -144,6 +148,7 @@ void erts_bits_destroy_state(ERL_BITS_PROTO_0);
*/
Eterm erts_bs_start_match_2(Process *p, Eterm Bin, Uint Max);
+ErlBinMatchState *erts_bs_start_match_3(Process *p, Eterm Bin);
Eterm erts_bs_get_integer_2(Process *p, Uint num_bits, unsigned flags, ErlBinMatchBuffer* mb);
Eterm erts_bs_get_binary_2(Process *p, Uint num_bits, unsigned flags, ErlBinMatchBuffer* mb);
Eterm erts_bs_get_float_2(Process *p, Uint num_bits, unsigned flags, ErlBinMatchBuffer* mb);
diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab
index d859c4bb24..349034e8ac 100644
--- a/erts/emulator/beam/ops.tab
+++ b/erts/emulator/beam/ops.tab
@@ -1089,21 +1089,21 @@ func_info M F A => i_func_info u M F A
%warm
bs_start_match2 Fail=f ica X Y D => jump Fail
bs_start_match2 Fail Bin X Y D => i_bs_start_match2 Bin Fail X Y D
-i_bs_start_match2 xy f t t x
+i_bs_start_match2 xy f t t d
bs_save2 Reg Index => gen_bs_save(Reg, Index)
-i_bs_save2 x t
+i_bs_save2 xy t
bs_restore2 Reg Index => gen_bs_restore(Reg, Index)
-i_bs_restore2 x t
+i_bs_restore2 xy t
# Matching integers
bs_match_string Fail Ms Bits Val => i_bs_match_string Ms Fail Bits Val
-i_bs_match_string x f W W
+i_bs_match_string xy f W W
# Fetching integers from binaries.
-bs_get_integer2 Fail=f Ms=x Live=u Sz=sq Unit=u Flags=u Dst=d => \
+bs_get_integer2 Fail=f Ms=xy Live=u Sz=sq Unit=u Flags=u Dst=d => \
gen_get_integer2(Fail, Ms, Live, Sz, Unit, Flags, Dst)
i_bs_get_integer_small_imm Ms Bits Fail Flags Y=y => \
@@ -1112,50 +1112,50 @@ i_bs_get_integer_small_imm Ms Bits Fail Flags Y=y => \
i_bs_get_integer_imm Ms Bits Live Fail Flags Y=y => \
i_bs_get_integer_imm Ms Bits Live Fail Flags x | move x Y
-i_bs_get_integer_small_imm x W f? t x
-i_bs_get_integer_imm x W t f? t x
-i_bs_get_integer f? t t x s xy
-i_bs_get_integer_8 x f? xy
-i_bs_get_integer_16 x f? xy
+i_bs_get_integer_small_imm xy W f? t x
+i_bs_get_integer_imm xy W t f? t x
+i_bs_get_integer f? t t xy s d
+i_bs_get_integer_8 xy f? d
+i_bs_get_integer_16 xy f? d
%if ARCH_64
-i_bs_get_integer_32 x f? xy
+i_bs_get_integer_32 xy f? d
%endif
# Fetching binaries from binaries.
-bs_get_binary2 Fail=f Ms=x Live=u Sz=sq Unit=u Flags=u Dst=d => \
+bs_get_binary2 Fail=f Ms=xy Live=u Sz=sq Unit=u Flags=u Dst=d => \
gen_get_binary2(Fail, Ms, Live, Sz, Unit, Flags, Dst)
-i_bs_get_binary_imm2 f? x t W t xy
-i_bs_get_binary2 f x t? s t xy
-i_bs_get_binary_all2 f? x t t xy
-i_bs_get_binary_all_reuse x f? t
+i_bs_get_binary_imm2 f? xy t W t d
+i_bs_get_binary2 f xy t? s t d
+i_bs_get_binary_all2 f? xy t t d
+i_bs_get_binary_all_reuse xy f? t
# Fetching float from binaries.
-bs_get_float2 Fail=f Ms=x Live=u Sz=s Unit=u Flags=u Dst=d => \
+bs_get_float2 Fail=f Ms=xy Live=u Sz=s Unit=u Flags=u Dst=d => \
gen_get_float2(Fail, Ms, Live, Sz, Unit, Flags, Dst)
bs_get_float2 Fail=f Ms=x Live=u Sz=q Unit=u Flags=u Dst=d => jump Fail
-i_bs_get_float2 f? x t s t xy
+i_bs_get_float2 f? xy t s t d
# Miscellanous
-bs_skip_bits2 Fail=f Ms=x Sz=sq Unit=u Flags=u => \
+bs_skip_bits2 Fail=f Ms=xy Sz=sq Unit=u Flags=u => \
gen_skip_bits2(Fail, Ms, Sz, Unit, Flags)
-i_bs_skip_bits_imm2 f? x W
-i_bs_skip_bits2 f? x xy t
-i_bs_skip_bits_all2 f? x t
+i_bs_skip_bits_imm2 f? xy W
+i_bs_skip_bits2 f? xy xy t
+i_bs_skip_bits_all2 f? xy t
-bs_test_tail2 Fail=f Ms=x Bits=u==0 => bs_test_zero_tail2 Fail Ms
-bs_test_tail2 Fail=f Ms=x Bits=u => bs_test_tail_imm2 Fail Ms Bits
-bs_test_zero_tail2 f? x
-bs_test_tail_imm2 f? x W
+bs_test_tail2 Fail=f Ms=xy Bits=u==0 => bs_test_zero_tail2 Fail Ms
+bs_test_tail2 Fail=f Ms=xy Bits=u => bs_test_tail_imm2 Fail Ms Bits
+bs_test_zero_tail2 f? xy
+bs_test_tail_imm2 f? xy W
bs_test_unit F Ms Unit=u==8 => bs_test_unit8 F Ms
-bs_test_unit f? x t
-bs_test_unit8 f? x
+bs_test_unit f? xy t
+bs_test_unit8 f? xy
# An y register operand for bs_context_to_binary is rare,
# but can happen because of inlining.
@@ -1167,23 +1167,55 @@ bs_context_to_binary Y=y => move Y x | bs_context_to_binary x
bs_context_to_binary x
+# Gets a bitstring from the tail of a context.
+bs_get_tail xy d t
+
+# New bs_start_match variant for contexts with external position storage.
+#
+# bs_get/set_position is used to save positions into registers instead of
+# "slots" in the context itself, which lets us continue matching even after
+# we've passed it off to another function.
+
+%if ARCH_64
+bs_start_match3 Fail Bin Live Ctx | bs_get_position Ctx Pos=x Ignored => \
+ i_bs_start_match3_gp Bin Live Fail Ctx Pos
+i_bs_start_match3_gp xy t f d x
+%endif
+
+bs_start_match3 Fail=f ica Live Dst => jump Fail
+bs_start_match3 Fail Bin Live Dst => i_bs_start_match3 Bin Live Fail Dst
+
+i_bs_start_match3 xy t f d
+
+# Match context position instructions. 64-bit assumes that all positions can
+# fit into an unsigned small.
+
+%if ARCH_64
+ bs_get_position Src Dst Live => i_bs_get_position Src Dst
+ i_bs_get_position xy xy
+ bs_set_position xy xy
+%else
+ bs_get_position xy d t?
+ bs_set_position xy xy
+%endif
+
#
# Utf8/utf16/utf32 support. (R12B-5)
#
-bs_get_utf8 Fail=f Ms=x u u Dst=d => i_bs_get_utf8 Ms Fail Dst
-i_bs_get_utf8 x f? xy
+bs_get_utf8 Fail=f Ms=xy u u Dst=d => i_bs_get_utf8 Ms Fail Dst
+i_bs_get_utf8 xy f? d
-bs_skip_utf8 Fail=f Ms=x u u => i_bs_get_utf8 Ms Fail x
+bs_skip_utf8 Fail=f Ms=xy u u => i_bs_get_utf8 Ms Fail x
-bs_get_utf16 Fail=f Ms=x u Flags=u Dst=d => i_bs_get_utf16 Ms Fail Flags Dst
-bs_skip_utf16 Fail=f Ms=x u Flags=u => i_bs_get_utf16 Ms Fail Flags x
+bs_get_utf16 Fail=f Ms=xy u Flags=u Dst=d => i_bs_get_utf16 Ms Fail Flags Dst
+bs_skip_utf16 Fail=f Ms=xy u Flags=u => i_bs_get_utf16 Ms Fail Flags x
-i_bs_get_utf16 x f? t xy
+i_bs_get_utf16 xy f? t d
-bs_get_utf32 Fail=f Ms=x Live=u Flags=u Dst=d => \
+bs_get_utf32 Fail=f Ms=xy Live=u Flags=u Dst=d => \
bs_get_integer2 Fail Ms Live i=32 u=1 Flags Dst | \
i_bs_validate_unicode_retract Fail Dst Ms
-bs_skip_utf32 Fail=f Ms=x Live=u Flags=u => \
+bs_skip_utf32 Fail=f Ms=xy Live=u Flags=u => \
bs_get_integer2 Fail Ms Live i=32 u=1 Flags x | \
i_bs_validate_unicode_retract Fail x Ms