diff options
author | Rickard Green <[email protected]> | 2017-03-27 14:39:49 +0200 |
---|---|---|
committer | Rickard Green <[email protected]> | 2017-04-05 14:36:43 +0200 |
commit | 74f11bdee247626ed6a3afe558bde4989687e642 (patch) | |
tree | a4ca0966d941c35f94825a73d7191ea96f34d4f6 /erts/emulator/pcre/pcre_jit_compile.c | |
parent | a748cafdc7063d9f181ba12088db6458793ced2f (diff) | |
download | otp-74f11bdee247626ed6a3afe558bde4989687e642.tar.gz otp-74f11bdee247626ed6a3afe558bde4989687e642.tar.bz2 otp-74f11bdee247626ed6a3afe558bde4989687e642.zip |
Update PCRE to version 8.40
Diffstat (limited to 'erts/emulator/pcre/pcre_jit_compile.c')
-rw-r--r-- | erts/emulator/pcre/pcre_jit_compile.c | 5966 |
1 files changed, 3970 insertions, 1996 deletions
diff --git a/erts/emulator/pcre/pcre_jit_compile.c b/erts/emulator/pcre/pcre_jit_compile.c index fb26c62817..89400498f0 100644 --- a/erts/emulator/pcre/pcre_jit_compile.c +++ b/erts/emulator/pcre/pcre_jit_compile.c @@ -52,8 +52,8 @@ POSSIBILITY OF SUCH DAMAGE. we just include it. This way we don't need to touch the build system files. */ -#define SLJIT_MALLOC(size) (PUBL(malloc))(size) -#define SLJIT_FREE(ptr) (PUBL(free))(ptr) +#define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size) +#define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr) #define SLJIT_CONFIG_AUTO 1 #define SLJIT_CONFIG_STATIC 1 #define SLJIT_VERBOSE 0 @@ -168,22 +168,23 @@ typedef struct jit_arguments { pcre_uchar *mark_ptr; void *callout_data; /* Everything else after. */ - pcre_uint32 limit_match; + sljit_u32 limit_match; int real_offset_count; int offset_count; - pcre_uint8 notbol; - pcre_uint8 noteol; - pcre_uint8 notempty; - pcre_uint8 notempty_atstart; + sljit_u8 notbol; + sljit_u8 noteol; + sljit_u8 notempty; + sljit_u8 notempty_atstart; } jit_arguments; typedef struct executable_functions { void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES]; + void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES]; + sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES]; PUBL(jit_callback) callback; void *userdata; - pcre_uint32 top_bracket; - pcre_uint32 limit_match; - sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES]; + sljit_u32 top_bracket; + sljit_u32 limit_match; } executable_functions; typedef struct jump_list { @@ -197,6 +198,12 @@ typedef struct stub_list { struct stub_list *next; } stub_list; +typedef struct label_addr_list { + struct sljit_label *label; + sljit_uw *update_addr; + struct label_addr_list *next; +} label_addr_list; + enum frame_types { no_frame = -1, no_stack = -2 @@ -270,11 +277,25 @@ typedef struct braminzero_backtrack { struct sljit_label *matchingpath; } braminzero_backtrack; -typedef struct iterator_backtrack { +typedef struct char_iterator_backtrack { + backtrack_common common; + /* Next iteration. */ + struct sljit_label *matchingpath; + union { + jump_list *backtracks; + struct { + unsigned int othercasebit; + pcre_uchar chr; + BOOL enabled; + } charpos; + } u; +} char_iterator_backtrack; + +typedef struct ref_iterator_backtrack { backtrack_common common; /* Next iteration. */ struct sljit_label *matchingpath; -} iterator_backtrack; +} ref_iterator_backtrack; typedef struct recurse_entry { struct recurse_entry *next; @@ -306,7 +327,7 @@ typedef struct then_trap_backtrack { int framesize; } then_trap_backtrack; -#define MAX_RANGE_SIZE 6 +#define MAX_RANGE_SIZE 4 typedef struct compiler_common { /* The sljit ceneric compiler. */ @@ -314,64 +335,77 @@ typedef struct compiler_common { /* First byte code. */ pcre_uchar *start; /* Maps private data offset to each opcode. */ - sljit_si *private_data_ptrs; + sljit_s32 *private_data_ptrs; + /* Chain list of read-only data ptrs. */ + void *read_only_data_head; /* Tells whether the capturing bracket is optimized. */ - pcre_uint8 *optimized_cbracket; + sljit_u8 *optimized_cbracket; /* Tells whether the starting offset is a target of then. */ - pcre_uint8 *then_offsets; + sljit_u8 *then_offsets; /* Current position where a THEN must jump. */ then_trap_backtrack *then_trap; /* Starting offset of private data for capturing brackets. */ - int cbra_ptr; + sljit_s32 cbra_ptr; /* Output vector starting point. Must be divisible by 2. */ - int ovector_start; + sljit_s32 ovector_start; + /* Points to the starting character of the current match. */ + sljit_s32 start_ptr; /* Last known position of the requested byte. */ - int req_char_ptr; + sljit_s32 req_char_ptr; /* Head of the last recursion. */ - int recursive_head_ptr; - /* First inspected character for partial matching. */ - int start_used_ptr; + sljit_s32 recursive_head_ptr; + /* First inspected character for partial matching. + (Needed for avoiding zero length partial matches.) */ + sljit_s32 start_used_ptr; /* Starting pointer for partial soft matches. */ - int hit_start; - /* End pointer of the first line. */ - int first_line_end; + sljit_s32 hit_start; + /* Pointer of the match end position. */ + sljit_s32 match_end_ptr; /* Points to the marked string. */ - int mark_ptr; + sljit_s32 mark_ptr; /* Recursive control verb management chain. */ - int control_head_ptr; + sljit_s32 control_head_ptr; /* Points to the last matched capture block index. */ - int capture_last_ptr; - /* Points to the starting position of the current match. */ - int start_ptr; + sljit_s32 capture_last_ptr; + /* Fast forward skipping byte code pointer. */ + pcre_uchar *fast_forward_bc_ptr; + /* Locals used by fast fail optimization. */ + sljit_s32 fast_fail_start_ptr; + sljit_s32 fast_fail_end_ptr; /* Flipped and lower case tables. */ - const pcre_uint8 *fcc; + const sljit_u8 *fcc; sljit_sw lcc; /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */ int mode; + /* TRUE, when minlength is greater than 0. */ + BOOL might_be_empty; /* \K is found in the pattern. */ BOOL has_set_som; /* (*SKIP:arg) is found in the pattern. */ BOOL has_skip_arg; /* (*THEN) is found in the pattern. */ BOOL has_then; - /* Needs to know the start position anytime. */ - BOOL needs_start_ptr; + /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */ + BOOL has_skip_in_assert_back; /* Currently in recurse or negative assert. */ BOOL local_exit; /* Currently in a positive assert. */ BOOL positive_assert; /* Newline control. */ int nltype; + sljit_u32 nlmax; + sljit_u32 nlmin; int newline; int bsr_nltype; + sljit_u32 bsr_nlmax; + sljit_u32 bsr_nlmin; /* Dollar endonly. */ int endonly; /* Tables. */ sljit_sw ctypes; - int digits[2 + MAX_RANGE_SIZE]; /* Named capturing brackets. */ - sljit_uw name_table; + pcre_uchar *name_table; sljit_sw name_count; sljit_sw name_entry_size; @@ -380,7 +414,9 @@ typedef struct compiler_common { struct sljit_label *quit_label; struct sljit_label *forced_quit_label; struct sljit_label *accept_label; + struct sljit_label *ff_newline_shortcut; stub_list *stubs; + label_addr_list *label_addrs; recurse_entry *entries; recurse_entry *currententry; jump_list *partialmatch; @@ -403,17 +439,14 @@ typedef struct compiler_common { BOOL utf; #ifdef SUPPORT_UCP BOOL use_ucp; -#endif -#ifndef COMPILE_PCRE32 - jump_list *utfreadchar; + jump_list *getucd; #endif #ifdef COMPILE_PCRE8 + jump_list *utfreadchar; + jump_list *utfreadchar16; jump_list *utfreadtype8; #endif #endif /* SUPPORT_UTF */ -#ifdef SUPPORT_UCP - jump_list *getucd; -#endif } compiler_common; /* For byte_sequence_compare. */ @@ -424,27 +457,27 @@ typedef struct compare_context { #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED int ucharptr; union { - sljit_si asint; - sljit_uh asushort; + sljit_s32 asint; + sljit_u16 asushort; #if defined COMPILE_PCRE8 - sljit_ub asbyte; - sljit_ub asuchars[4]; + sljit_u8 asbyte; + sljit_u8 asuchars[4]; #elif defined COMPILE_PCRE16 - sljit_uh asuchars[2]; + sljit_u16 asuchars[2]; #elif defined COMPILE_PCRE32 - sljit_ui asuchars[1]; + sljit_u32 asuchars[1]; #endif } c; union { - sljit_si asint; - sljit_uh asushort; + sljit_s32 asint; + sljit_u16 asushort; #if defined COMPILE_PCRE8 - sljit_ub asbyte; - sljit_ub asuchars[4]; + sljit_u8 asbyte; + sljit_u8 asuchars[4]; #elif defined COMPILE_PCRE16 - sljit_uh asuchars[2]; + sljit_u16 asuchars[2]; #elif defined COMPILE_PCRE32 - sljit_ui asuchars[1]; + sljit_u32 asuchars[1]; #endif } oc; #endif @@ -456,16 +489,16 @@ typedef struct compare_context { /* Used for accessing the elements of the stack. */ #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw)) -#define TMP1 SLJIT_SCRATCH_REG1 -#define TMP2 SLJIT_SCRATCH_REG3 -#define TMP3 SLJIT_TEMPORARY_EREG2 -#define STR_PTR SLJIT_SAVED_REG1 -#define STR_END SLJIT_SAVED_REG2 -#define STACK_TOP SLJIT_SCRATCH_REG2 -#define STACK_LIMIT SLJIT_SAVED_REG3 -#define ARGUMENTS SLJIT_SAVED_EREG1 -#define COUNT_MATCH SLJIT_SAVED_EREG2 -#define RETURN_ADDR SLJIT_TEMPORARY_EREG1 +#define TMP1 SLJIT_R0 +#define TMP2 SLJIT_R2 +#define TMP3 SLJIT_R3 +#define STR_PTR SLJIT_S0 +#define STR_END SLJIT_S1 +#define STACK_TOP SLJIT_R1 +#define STACK_LIMIT SLJIT_S2 +#define COUNT_MATCH SLJIT_S3 +#define ARGUMENTS SLJIT_S4 +#define RETURN_ADDR SLJIT_R4 /* Local space layout. */ /* These two locals can be used by the current opcode. */ @@ -481,19 +514,19 @@ to characters. The vector data is divided into two groups: the first group contains the start / end character pointers, and the second is the start pointers when the end of the capturing group has not yet reached. */ #define OVECTOR_START (common->ovector_start) -#define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw)) -#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * sizeof(sljit_sw)) +#define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw)) +#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw)) #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start]) #if defined COMPILE_PCRE8 -#define MOV_UCHAR SLJIT_MOV_UB -#define MOVU_UCHAR SLJIT_MOVU_UB +#define MOV_UCHAR SLJIT_MOV_U8 +#define MOVU_UCHAR SLJIT_MOVU_U8 #elif defined COMPILE_PCRE16 -#define MOV_UCHAR SLJIT_MOV_UH -#define MOVU_UCHAR SLJIT_MOVU_UH +#define MOV_UCHAR SLJIT_MOV_U16 +#define MOVU_UCHAR SLJIT_MOVU_U16 #elif defined COMPILE_PCRE32 -#define MOV_UCHAR SLJIT_MOV_UI -#define MOVU_UCHAR SLJIT_MOVU_UI +#define MOV_UCHAR SLJIT_MOV_U32 +#define MOVU_UCHAR SLJIT_MOVU_U32 #else #error Unsupported compiling mode #endif @@ -524,7 +557,9 @@ the start pointers when the end of the capturing group has not yet reached. */ #define GET_LOCAL_BASE(dst, dstw, offset) \ sljit_get_local_base(compiler, (dst), (dstw), (offset)) -static pcre_uchar* bracketend(pcre_uchar* cc) +#define READ_CHAR_MAX 0x7fffffff + +static pcre_uchar *bracketend(pcre_uchar *cc) { SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); do cc += GET(cc, 1); while (*cc == OP_ALT); @@ -533,6 +568,20 @@ cc += 1 + LINK_SIZE; return cc; } +static int no_alternatives(pcre_uchar *cc) +{ +int count = 0; +SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); +do + { + cc += GET(cc, 1); + count++; + } +while (*cc == OP_ALT); +SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS); +return count; +} + /* Functions whose might need modification for all new supported opcodes: next_opcode check_opcode_types @@ -585,10 +634,16 @@ switch(*cc) case OP_CRMINQUERY: case OP_CRRANGE: case OP_CRMINRANGE: + case OP_CRPOSSTAR: + case OP_CRPOSPLUS: + case OP_CRPOSQUERY: + case OP_CRPOSRANGE: case OP_CLASS: case OP_NCLASS: case OP_REF: case OP_REFI: + case OP_DNREF: + case OP_DNREFI: case OP_RECURSE: case OP_CALLOUT: case OP_ALT: @@ -614,9 +669,9 @@ switch(*cc) case OP_SCBRAPOS: case OP_SCOND: case OP_CREF: - case OP_NCREF: + case OP_DNCREF: case OP_RREF: - case OP_NRREF: + case OP_DNRREF: case OP_DEF: case OP_BRAZERO: case OP_BRAMINZERO: @@ -736,10 +791,9 @@ switch(*cc) static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend) { -pcre_uchar *name; -pcre_uchar *name2; -unsigned int cbra_index; -int i; +int count; +pcre_uchar *slot; +pcre_uchar *assert_back_end = cc - 1; /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */ while (cc < ccend) @@ -748,6 +802,7 @@ while (cc < ccend) { case OP_SET_SOM: common->has_set_som = TRUE; + common->might_be_empty = TRUE; cc += 1; break; @@ -773,29 +828,21 @@ while (cc < ccend) break; case OP_CREF: - i = GET2(cc, 1); - common->optimized_cbracket[i] = 0; + common->optimized_cbracket[GET2(cc, 1)] = 0; cc += 1 + IMM2_SIZE; break; - case OP_NCREF: - cbra_index = GET2(cc, 1); - name = (pcre_uchar *)common->name_table; - name2 = name; - for (i = 0; i < common->name_count; i++) - { - if (GET2(name, 0) == cbra_index) break; - name += common->name_entry_size; - } - SLJIT_ASSERT(i != common->name_count); - - for (i = 0; i < common->name_count; i++) + case OP_DNREF: + case OP_DNREFI: + case OP_DNCREF: + count = GET2(cc, 1 + IMM2_SIZE); + slot = common->name_table + GET2(cc, 1) * common->name_entry_size; + while (count-- > 0) { - if (STRCMP_UC_UC(name2 + IMM2_SIZE, name + IMM2_SIZE) == 0) - common->optimized_cbracket[GET2(name2, 0)] = 0; - name2 += common->name_entry_size; + common->optimized_cbracket[GET2(slot, 0)] = 0; + slot += common->name_entry_size; } - cc += 1 + IMM2_SIZE; + cc += 1 + 2 * IMM2_SIZE; break; case OP_RECURSE: @@ -817,15 +864,19 @@ while (cc < ccend) cc += 2 + 2 * LINK_SIZE; break; + case OP_ASSERTBACK: + slot = bracketend(cc); + if (slot > assert_back_end) + assert_back_end = slot; + cc += 1 + LINK_SIZE; + break; + case OP_THEN_ARG: common->has_then = TRUE; common->control_head_ptr = 1; /* Fall through. */ case OP_PRUNE_ARG: - common->needs_start_ptr = TRUE; - /* Fall through. */ - case OP_MARK: if (common->mark_ptr == 0) { @@ -838,17 +889,20 @@ while (cc < ccend) case OP_THEN: common->has_then = TRUE; common->control_head_ptr = 1; - /* Fall through. */ + cc += 1; + break; - case OP_PRUNE: case OP_SKIP: - common->needs_start_ptr = TRUE; + if (cc < assert_back_end) + common->has_skip_in_assert_back = TRUE; cc += 1; break; case OP_SKIP_ARG: common->control_head_ptr = 1; common->has_skip_arg = TRUE; + if (cc < assert_back_end) + common->has_skip_in_assert_back = TRUE; cc += 1 + 2 + cc[1]; break; @@ -862,8 +916,189 @@ while (cc < ccend) return TRUE; } +static BOOL is_accelerated_repeat(pcre_uchar *cc) +{ +switch(*cc) + { + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI); + + case OP_STAR: + case OP_MINSTAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_POSSTAR: + case OP_POSPLUS: + + case OP_STARI: + case OP_MINSTARI: + case OP_PLUSI: + case OP_MINPLUSI: + case OP_POSSTARI: + case OP_POSPLUSI: + + case OP_NOTSTAR: + case OP_NOTMINSTAR: + case OP_NOTPLUS: + case OP_NOTMINPLUS: + case OP_NOTPOSSTAR: + case OP_NOTPOSPLUS: + + case OP_NOTSTARI: + case OP_NOTMINSTARI: + case OP_NOTPLUSI: + case OP_NOTMINPLUSI: + case OP_NOTPOSSTARI: + case OP_NOTPOSPLUSI: + return TRUE; + + case OP_CLASS: + case OP_NCLASS: +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 + case OP_XCLASS: + cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar))); +#else + cc += (1 + (32 / sizeof(pcre_uchar))); +#endif + + switch(*cc) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRPOSSTAR: + case OP_CRPOSPLUS: + return TRUE; + } + break; + } +return FALSE; +} + +static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start) +{ +pcre_uchar *cc = common->start; +pcre_uchar *end; + +/* Skip not repeated brackets. */ +while (TRUE) + { + switch(*cc) + { + case OP_SOD: + case OP_SOM: + case OP_SET_SOM: + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_EODN: + case OP_EOD: + case OP_CIRC: + case OP_CIRCM: + case OP_DOLL: + case OP_DOLLM: + /* Zero width assertions. */ + cc++; + continue; + } + + if (*cc != OP_BRA && *cc != OP_CBRA) + break; + + end = cc + GET(cc, 1); + if (*end != OP_KET || PRIVATE_DATA(end) != 0) + return FALSE; + if (*cc == OP_CBRA) + { + if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) + return FALSE; + cc += IMM2_SIZE; + } + cc += 1 + LINK_SIZE; + } + +if (is_accelerated_repeat(cc)) + { + common->fast_forward_bc_ptr = cc; + common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start; + *private_data_start += sizeof(sljit_sw); + return TRUE; + } +return FALSE; +} + +static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_s32 depth) +{ + pcre_uchar *next_alt; + + SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA); + + if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) + return; + + next_alt = bracketend(cc) - (1 + LINK_SIZE); + if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0) + return; + + do + { + next_alt = cc + GET(cc, 1); + + cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0); + + while (TRUE) + { + switch(*cc) + { + case OP_SOD: + case OP_SOM: + case OP_SET_SOM: + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_EODN: + case OP_EOD: + case OP_CIRC: + case OP_CIRCM: + case OP_DOLL: + case OP_DOLLM: + /* Zero width assertions. */ + cc++; + continue; + } + break; + } + + if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA)) + detect_fast_fail(common, cc, private_data_start, depth - 1); + + if (is_accelerated_repeat(cc)) + { + common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start; + + if (common->fast_fail_start_ptr == 0) + common->fast_fail_start_ptr = *private_data_start; + + *private_data_start += sizeof(sljit_sw); + common->fast_fail_end_ptr = *private_data_start; + + if (*private_data_start > SLJIT_MAX_LOCAL_SIZE) + return; + } + + cc = next_alt; + } + while (*cc == OP_ALT); +} + static int get_class_iterator_size(pcre_uchar *cc) { +sljit_u32 min; +sljit_u32 max; switch(*cc) { case OP_CRSTAR: @@ -878,9 +1113,14 @@ switch(*cc) case OP_CRRANGE: case OP_CRMINRANGE: - if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE)) - return 0; - return 2; + min = GET2(cc, 1); + max = GET2(cc, 1 + IMM2_SIZE); + if (max == 0) + return (*cc == OP_CRRANGE) ? 2 : 1; + max -= min; + if (max > 2) + max = 2; + return max; default: return 0; @@ -1031,6 +1271,7 @@ pcre_uchar *alternative; pcre_uchar *end = NULL; int private_data_ptr = *private_data_start; int space, size, bracketlen; +BOOL repeat_check = TRUE; while (cc < ccend) { @@ -1038,9 +1279,10 @@ while (cc < ccend) size = 0; bracketlen = 0; if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE) - return; + break; - if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND) + if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)) + { if (detect_repeat(common, cc)) { /* These brackets are converted to repeats, so no global @@ -1048,6 +1290,8 @@ while (cc < ccend) if (cc >= end) end = bracketend(cc); } + } + repeat_check = TRUE; switch(*cc) { @@ -1103,6 +1347,13 @@ while (cc < ccend) bracketlen = 1 + LINK_SIZE + IMM2_SIZE; break; + case OP_BRAZERO: + case OP_BRAMINZERO: + case OP_BRAPOSZERO: + repeat_check = FALSE; + size = 1; + break; + CASE_ITERATOR_PRIVATE_DATA_1 space = 1; size = -2; @@ -1129,22 +1380,27 @@ while (cc < ccend) size = 1; break; - CASE_ITERATOR_TYPE_PRIVATE_DATA_2B + case OP_TYPEUPTO: if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI) space = 2; size = 1 + IMM2_SIZE; break; + case OP_TYPEMINUPTO: + space = 2; + size = 1 + IMM2_SIZE; + break; + case OP_CLASS: case OP_NCLASS: - size += 1 + 32 / sizeof(pcre_uchar); space = get_class_iterator_size(cc + size); + size = 1 + 32 / sizeof(pcre_uchar); break; #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 case OP_XCLASS: - size = GET(cc, 1); space = get_class_iterator_size(cc + size); + size = GET(cc, 1); break; #endif @@ -1190,7 +1446,7 @@ while (cc < ccend) } /* Returns with a frame_types (always < 0) if no need for frame. */ -static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head) +static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head) { int length = 0; int possessive = 0; @@ -1283,6 +1539,13 @@ while (cc < ccend) cc += 1 + LINK_SIZE + IMM2_SIZE; break; + case OP_THEN: + stack_restore = TRUE; + if (common->control_head_ptr != 0) + *needs_control_head = TRUE; + cc ++; + break; + default: stack_restore = TRUE; /* Fall through. */ @@ -1350,6 +1613,7 @@ while (cc < ccend) case OP_CLASS: case OP_NCLASS: case OP_XCLASS: + case OP_CALLOUT: cc = next_opcode(common, cc); SLJIT_ASSERT(cc != NULL); @@ -1394,7 +1658,7 @@ while (cc < ccend) SLJIT_ASSERT(common->has_set_som); if (!setsom_found) { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); stackpos += (int)sizeof(sljit_sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); @@ -1410,7 +1674,7 @@ while (cc < ccend) SLJIT_ASSERT(common->mark_ptr != 0); if (!setmark_found) { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); stackpos += (int)sizeof(sljit_sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); @@ -1423,7 +1687,7 @@ while (cc < ccend) case OP_RECURSE: if (common->has_set_som && !setsom_found) { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); stackpos += (int)sizeof(sljit_sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); @@ -1432,7 +1696,7 @@ while (cc < ccend) } if (common->mark_ptr != 0 && !setmark_found) { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); stackpos += (int)sizeof(sljit_sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); @@ -1441,7 +1705,7 @@ while (cc < ccend) } if (common->capture_last_ptr != 0 && !capture_last_found) { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); stackpos += (int)sizeof(sljit_sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); @@ -1457,7 +1721,7 @@ while (cc < ccend) case OP_SCBRAPOS: if (common->capture_last_ptr != 0 && !capture_last_found) { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); stackpos += (int)sizeof(sljit_sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); @@ -1467,8 +1731,8 @@ while (cc < ccend) offset = (GET2(cc, 1 + LINK_SIZE)) << 1; OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset)); stackpos += (int)sizeof(sljit_sw); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); stackpos += (int)sizeof(sljit_sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0); @@ -1500,7 +1764,11 @@ while (cc < ccend) { case OP_KET: if (PRIVATE_DATA(cc) != 0) + { private_data_length++; + SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0); + cc += PRIVATE_DATA(cc + 1); + } cc += 1 + LINK_SIZE; break; @@ -1515,6 +1783,7 @@ while (cc < ccend) case OP_SBRAPOS: case OP_SCOND: private_data_length++; + SLJIT_ASSERT(PRIVATE_DATA(cc) != 0); cc += 1 + LINK_SIZE; break; @@ -1677,6 +1946,8 @@ do { count = 1; srcw[0] = PRIVATE_DATA(cc); + SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0); + cc += PRIVATE_DATA(cc + 1); } cc += 1 + LINK_SIZE; break; @@ -1848,7 +2119,7 @@ do OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); stackptr += sizeof(sljit_sw); } - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]); tmp1empty = FALSE; tmp1next = FALSE; } @@ -1859,7 +2130,7 @@ do OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); stackptr += sizeof(sljit_sw); } - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]); tmp2empty = FALSE; tmp1next = TRUE; } @@ -1869,7 +2140,7 @@ do if (tmp1next) { SLJIT_ASSERT(!tmp1empty); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0); tmp1empty = stackptr >= stacktop; if (!tmp1empty) { @@ -1881,7 +2152,7 @@ do else { SLJIT_ASSERT(!tmp2empty); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0); tmp2empty = stackptr >= stacktop; if (!tmp2empty) { @@ -1927,7 +2198,7 @@ if (save) SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty))); } -static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset) +static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, sljit_u8 *current_offset) { pcre_uchar *end = bracketend(cc); BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT; @@ -1983,7 +2254,7 @@ while (list) } } -static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump) +static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump) { jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list)); if (list_item) @@ -1997,7 +2268,7 @@ if (list_item) static void add_stub(compiler_common *common, struct sljit_jump *start) { DEFINE_COMPILER; -stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list)); +stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list)); if (list_item) { @@ -2011,7 +2282,7 @@ if (list_item) static void flush_stubs(compiler_common *common) { DEFINE_COMPILER; -stub_list* list_item = common->stubs; +stub_list *list_item = common->stubs; while (list_item) { @@ -2023,12 +2294,26 @@ while (list_item) common->stubs = NULL; } +static void add_label_addr(compiler_common *common, sljit_uw *update_addr) +{ +DEFINE_COMPILER; +label_addr_list *label_addr; + +label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list)); +if (label_addr == NULL) + return; +label_addr->label = LABEL(); +label_addr->update_addr = update_addr; +label_addr->next = common->label_addrs; +common->label_addrs = label_addr; +} + static SLJIT_INLINE void count_match(compiler_common *common) { DEFINE_COMPILER; OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1); -add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO)); +add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO)); } static SLJIT_INLINE void allocate_stack(compiler_common *common, int size) @@ -2036,23 +2321,60 @@ static SLJIT_INLINE void allocate_stack(compiler_common *common, int size) /* May destroy all locals and registers except TMP2. */ DEFINE_COMPILER; +SLJIT_ASSERT(size > 0); OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); #ifdef DESTROY_REGISTERS OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345); OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0); #endif -add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0)); +add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0)); } static SLJIT_INLINE void free_stack(compiler_common *common, int size) { DEFINE_COMPILER; + +SLJIT_ASSERT(size > 0); OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); } +static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size) +{ +DEFINE_COMPILER; +sljit_uw *result; + +if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + return NULL; + +result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data); +if (SLJIT_UNLIKELY(result == NULL)) + { + sljit_set_compiler_memory_error(compiler); + return NULL; + } + +*(void**)result = common->read_only_data_head; +common->read_only_data_head = (void *)result; +return result + 1; +} + +static void free_read_only_data(void *current, void *allocator_data) +{ +void *next; + +SLJIT_UNUSED_ARG(allocator_data); + +while (current != NULL) + { + next = *(void**)current; + SLJIT_FREE(current, allocator_data); + current = next; + } +} + static SLJIT_INLINE void reset_ovector(compiler_common *common, int length) { DEFINE_COMPILER; @@ -2062,23 +2384,35 @@ int i; /* At this point we can freely use all temporary registers. */ SLJIT_ASSERT(length > 1); /* TMP1 returns with begin - 1. */ -OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1)); +OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1)); if (length < 8) { for (i = 1; i < length; i++) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0); } else { - GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START); - OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1); + GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START); + OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1); loop = LABEL(); - OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1); - JUMPTO(SLJIT_C_NOT_ZERO, loop); + OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, loop); } } +static SLJIT_INLINE void reset_fast_fail(compiler_common *common) +{ +DEFINE_COMPILER; +sljit_s32 i; + +SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr); + +OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw)) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0); +} + static SLJIT_INLINE void do_reset_match(compiler_common *common, int length) { DEFINE_COMPILER; @@ -2088,11 +2422,11 @@ int i; SLJIT_ASSERT(length > 1); /* OVECTOR(1) contains the "string begin - 1" constant. */ if (length > 2) - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); if (length < 8) { for (i = 2; i < length; i++) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0); } else { @@ -2101,16 +2435,16 @@ else loop = LABEL(); OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0); OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1); - JUMPTO(SLJIT_C_NOT_ZERO, loop); + JUMPTO(SLJIT_NOT_ZERO, loop); } OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0); if (common->mark_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0); if (common->control_head_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack)); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base)); } @@ -2132,6 +2466,7 @@ while (current != NULL) SLJIT_ASSERT_STOP(); break; } + SLJIT_ASSERT(current > (sljit_sw*)current[-1]); current = (sljit_sw*)current[-1]; } return -1; @@ -2144,44 +2479,44 @@ struct sljit_label *loop; struct sljit_jump *early_quit; /* At this point we can freely use all registers. */ -OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0); +OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0); -OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0); +OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); if (common->mark_ptr != 0) - OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); -OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count)); + OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); +OP1(SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count)); if (common->mark_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0); -OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int)); -OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin)); -GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0); +OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int)); +OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin)); +GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START); /* Unlikely, but possible */ -early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0); +early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0); loop = LABEL(); -OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0); -OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw)); +OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); +OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw)); /* Copy the integer value to the output buffer */ #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 -OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT); +OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT); #endif -OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0); -OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1); -JUMPTO(SLJIT_C_NOT_ZERO, loop); +OP1(SLJIT_MOVU_S32, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0); +OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); +JUMPTO(SLJIT_NOT_ZERO, loop); JUMPHERE(early_quit); /* Calculate the return value, which is the maximum ovector value. */ if (topbracket > 1) { - GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw)); - OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1); + GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1); - /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */ + /* OVECTOR(0) is never equal to SLJIT_S2. */ loop = LABEL(); - OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw))); - OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1); - CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop); - OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0); + OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))); + OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); + CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop); + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0); } else OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1); @@ -2192,39 +2527,39 @@ static SLJIT_INLINE void return_with_partial_match(compiler_common *common, stru DEFINE_COMPILER; struct sljit_jump *jump; -SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2); +SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2); SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0)); -OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0); +OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL); -OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count)); -CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit); +OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count)); +CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit); /* Store match begin and end. */ -OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin)); -OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets)); +OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin)); +OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets)); -jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3); -OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0); +jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3); +OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0); #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 -OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT); +OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT); #endif -OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0); +OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0); JUMPHERE(jump); -OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start); -OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0); +OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start); +OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0); #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 -OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT); +OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT); #endif -OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0); +OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0); -OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0); +OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0); #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 -OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT); +OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT); #endif -OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0); +OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0); JUMPTO(SLJIT_JUMP, quit); } @@ -2238,22 +2573,22 @@ struct sljit_jump *jump; if (common->mode == JIT_PARTIAL_SOFT_COMPILE) { /* The value of -1 must be kept for start_used_ptr! */ - OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1); + OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1); /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */ - jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); + jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); JUMPHERE(jump); } else if (common->mode == JIT_PARTIAL_HARD_COMPILE) { - jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); + jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); JUMPHERE(jump); } } -static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc) +static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc) { /* Detects if the character has an othercase. */ unsigned int c; @@ -2296,7 +2631,7 @@ if (common->utf && c > 127) return TABLE_GET(c, common->fcc, c); } -static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc) +static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc) { /* Detects if the character and its othercase has only 1 bit difference. */ unsigned int c, oc, bit; @@ -2384,12 +2719,12 @@ if (common->mode == JIT_COMPILE) return; if (!force) - jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); + jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); else if (common->mode == JIT_PARTIAL_SOFT_COMPILE) - jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1); + jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1); if (common->mode == JIT_PARTIAL_SOFT_COMPILE) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); else { if (common->partialmatchlabel != NULL) @@ -2410,20 +2745,20 @@ struct sljit_jump *jump; if (common->mode == JIT_COMPILE) { - add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); return; } -jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); +jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); if (common->mode == JIT_PARTIAL_SOFT_COMPILE) { - add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0); + add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); add_jump(compiler, end_reached, JUMP(SLJIT_JUMP)); } else { - add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0)); + add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); if (common->partialmatchlabel != NULL) JUMPTO(SLJIT_JUMP, common->partialmatchlabel); else @@ -2439,16 +2774,16 @@ struct sljit_jump *jump; if (common->mode == JIT_COMPILE) { - add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); return; } /* Partial matching mode. */ -jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); -add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0)); +jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); +add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); if (common->mode == JIT_PARTIAL_SOFT_COMPILE) { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); } else @@ -2461,107 +2796,290 @@ else JUMPHERE(jump); } -static void read_char(compiler_common *common) +static void peek_char(compiler_common *common, sljit_u32 max) { -/* Reads the character into TMP1, updates STR_PTR. +/* Reads the character into TMP1, keeps STR_PTR. Does not check STR_END. TMP2 Destroyed. */ DEFINE_COMPILER; #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 struct sljit_jump *jump; #endif +SLJIT_UNUSED_ARG(max); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 if (common->utf) { -#if defined COMPILE_PCRE8 - jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); -#elif defined COMPILE_PCRE16 - jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); -#endif /* COMPILE_PCRE[8|16] */ + if (max < 128) return; + + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); JUMPHERE(jump); } #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ + +#if defined SUPPORT_UTF && defined COMPILE_PCRE16 +if (common->utf) + { + if (max < 0xd800) return; + + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); + /* TMP2 contains the high surrogate. */ + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + JUMPHERE(jump); + } +#endif +} + +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 + +static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass) +{ +/* Tells whether the character codes below 128 are enough +to determine a match. */ +const sljit_u8 value = nclass ? 0xff : 0; +const sljit_u8 *end = bitset + 32; + +bitset += 16; +do + { + if (*bitset++ != value) + return FALSE; + } +while (bitset < end); +return TRUE; +} + +static void read_char7_type(compiler_common *common, BOOL full_read) +{ +/* Reads the precise character type of a character into TMP1, if the character +is less than 128. Otherwise it returns with zero. Does not check STR_END. The +full_read argument tells whether characters above max are accepted or not. */ +DEFINE_COMPILER; +struct sljit_jump *jump; + +SLJIT_ASSERT(common->utf); + +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); + +if (full_read) + { + jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0); + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + JUMPHERE(jump); + } } -static void peek_char(compiler_common *common) +#endif /* SUPPORT_UTF && COMPILE_PCRE8 */ + +static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr) { -/* Reads the character into TMP1, keeps STR_PTR. -Does not check STR_END. TMP2 Destroyed. */ +/* Reads the precise value of a character into TMP1, if the character is +between min and max (c >= min && c <= max). Otherwise it returns with a value +outside the range. Does not check STR_END. */ DEFINE_COMPILER; #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 struct sljit_jump *jump; #endif +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 +struct sljit_jump *jump2; +#endif -OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); -#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 +SLJIT_UNUSED_ARG(update_str_ptr); +SLJIT_UNUSED_ARG(min); +SLJIT_UNUSED_ARG(max); +SLJIT_ASSERT(min <= max); + +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 if (common->utf) { -#if defined COMPILE_PCRE8 - jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); -#elif defined COMPILE_PCRE16 - jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); -#endif /* COMPILE_PCRE[8|16] */ - add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + if (max < 128 && !update_str_ptr) return; + + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); + if (min >= 0x10000) + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0); + if (update_str_ptr) + OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); + OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); + if (!update_str_ptr) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); + OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + JUMPHERE(jump2); + if (update_str_ptr) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); + } + else if (min >= 0x800 && max <= 0xffff) + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0); + if (update_str_ptr) + OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + if (!update_str_ptr) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); + OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + JUMPHERE(jump2); + if (update_str_ptr) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); + } + else if (max >= 0x800) + add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); + else if (max < 128) + { + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + } + else + { + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (!update_str_ptr) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + else + OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); + OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + if (update_str_ptr) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); + } JUMPHERE(jump); } -#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ +#endif + +#if defined SUPPORT_UTF && defined COMPILE_PCRE16 +if (common->utf) + { + if (max >= 0x10000) + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); + /* TMP2 contains the high surrogate. */ + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + JUMPHERE(jump); + return; + } + + if (max < 0xd800 && !update_str_ptr) return; + + /* Skip low surrogate if necessary. */ + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); + if (update_str_ptr) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + if (max >= 0xd800) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000); + JUMPHERE(jump); + } +#endif } -static void read_char8_type(compiler_common *common) +static SLJIT_INLINE void read_char(compiler_common *common) +{ +read_char_range(common, 0, READ_CHAR_MAX, TRUE); +} + +static void read_char8_type(compiler_common *common, BOOL update_str_ptr) { /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */ DEFINE_COMPILER; -#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 struct sljit_jump *jump; #endif +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 +struct sljit_jump *jump2; +#endif -#ifdef SUPPORT_UTF +SLJIT_UNUSED_ARG(update_str_ptr); + +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 if (common->utf) { - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -#if defined COMPILE_PCRE8 /* This can be an extra read in some situations, but hopefully it is needed in most cases. */ - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); - jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0); - add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL)); - JUMPHERE(jump); -#elif defined COMPILE_PCRE16 - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); - jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); - JUMPHERE(jump); - /* Skip low surrogate if necessary. */ - OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); -#elif defined COMPILE_PCRE32 - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); - jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); + jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0); + if (!update_str_ptr) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); + OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); + jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); + JUMPHERE(jump2); + } + else + add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL)); JUMPHERE(jump); -#endif /* COMPILE_PCRE[8|16|32] */ return; } -#endif /* SUPPORT_UTF */ -OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 +#endif /* SUPPORT_UTF && COMPILE_PCRE8 */ + +#if !defined COMPILE_PCRE8 /* The ctypes array contains only 256 values. */ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); -jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); +jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255); #endif -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); -#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); +#if !defined COMPILE_PCRE8 JUMPHERE(jump); #endif + +#if defined SUPPORT_UTF && defined COMPILE_PCRE16 +if (common->utf && update_str_ptr) + { + /* Skip low surrogate if necessary. */ + OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800); + jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + JUMPHERE(jump); + } +#endif /* SUPPORT_UTF && COMPILE_PCRE16 */ } static void skip_char_back(compiler_common *common) @@ -2578,7 +3096,7 @@ if (common->utf) OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); - CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label); return; } #elif defined COMPILE_PCRE16 @@ -2589,7 +3107,7 @@ if (common->utf) /* Skip low surrogate if necessary. */ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00); - OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); return; @@ -2599,28 +3117,35 @@ if (common->utf) OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); } -static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue) +static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch) { /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */ DEFINE_COMPILER; +struct sljit_jump *jump; if (nltype == NLTYPE_ANY) { add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); + add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO)); } else if (nltype == NLTYPE_ANYCRLF) { - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); + if (jumpifmatch) + { + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR)); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); + } + else + { + jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); + JUMPHERE(jump); + } } else { SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256); - add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); + add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); } } @@ -2630,58 +3155,84 @@ else static void do_utfreadchar(compiler_common *common) { /* Fast decoding a UTF-8 character. TMP1 contains the first byte -of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */ +of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */ DEFINE_COMPILER; struct sljit_jump *jump; sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); -/* Searching for the first zero. */ -OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); -jump = JUMP(SLJIT_C_NOT_ZERO); -/* Two byte sequence. */ -OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); + +/* Searching for the first zero. */ +OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); +jump = JUMP(SLJIT_NOT_ZERO); +/* Two byte sequence. */ +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2)); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); -JUMPHERE(jump); -OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10); -jump = JUMP(SLJIT_C_NOT_ZERO); -/* Three byte sequence. */ +JUMPHERE(jump); OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); -OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f); -OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12); +OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); -OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); + +OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000); +jump = JUMP(SLJIT_NOT_ZERO); +/* Three byte sequence. */ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); -OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); -OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2)); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3)); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); -JUMPHERE(jump); /* Four byte sequence. */ -OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); -OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07); -OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18); +JUMPHERE(jump); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); +OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); -OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4)); +sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +} + +static void do_utfreadchar16(compiler_common *common) +{ +/* Fast decoding a UTF-8 character. TMP1 contains the first byte +of the character (>= 0xc0). Return value in TMP1. */ +DEFINE_COMPILER; +struct sljit_jump *jump; + +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); -OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3)); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); + +/* Searching for the first zero. */ +OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); +jump = JUMP(SLJIT_NOT_ZERO); +/* Two byte sequence. */ +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +sljit_emit_fast_return(compiler, RETURN_ADDR, 0); + +JUMPHERE(jump); +OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400); +OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO); +/* This code runs only in 8 bit mode. No need to shift the value. */ +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); +OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3)); +/* Three byte sequence. */ +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); } @@ -2696,58 +3247,32 @@ struct sljit_jump *compare; sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20); -jump = JUMP(SLJIT_C_NOT_ZERO); +jump = JUMP(SLJIT_NOT_ZERO); /* Two byte sequence. */ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f); +/* The upper 5 bits are known at this point. */ +compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); -compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); JUMPHERE(compare); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); -JUMPHERE(jump); /* We only have types for characters less than 256. */ -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); -} - -#elif defined COMPILE_PCRE16 - -static void do_utfreadchar(compiler_common *common) -{ -/* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char -of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */ -DEFINE_COMPILER; -struct sljit_jump *jump; - -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); -jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00); -/* Do nothing, only return. */ -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); - JUMPHERE(jump); -/* Combine two 16 bit characters. */ -OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff); -OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10); -OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff); -OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); -OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000); +OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); } -#endif /* COMPILE_PCRE[8|16] */ +#endif /* COMPILE_PCRE8 */ #endif /* SUPPORT_UTF */ @@ -2767,26 +3292,26 @@ SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8); sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); +OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); -OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); +OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); } #endif -static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline) +static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf) { DEFINE_COMPILER; struct sljit_label *mainloop; struct sljit_label *newlinelabel = NULL; struct sljit_jump *start; struct sljit_jump *end = NULL; -struct sljit_jump *nl = NULL; +struct sljit_jump *end2 = NULL; #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 struct sljit_jump *singlechar; #endif @@ -2794,39 +3319,38 @@ jump_list *newline = NULL; BOOL newlinecheck = FALSE; BOOL readuchar = FALSE; -if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY || - common->nltype == NLTYPE_ANYCRLF || common->newline > 255)) +if (!(hascrorlf || (common->match_end_ptr != 0)) && + (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255)) newlinecheck = TRUE; -if (firstline) +if (common->match_end_ptr != 0) { /* Search for the end of the first line. */ - SLJIT_ASSERT(common->first_line_end != 0); OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); if (common->nltype == NLTYPE_FIXED && common->newline > 255) { mainloop = LABEL(); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop); - CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop); + CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop); JUMPHERE(end); - OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); } else { - end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); mainloop = LABEL(); /* Continual stores does not cause data dependency. */ - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0); - read_char(common); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0); + read_char_range(common, common->nlmin, common->nlmax, TRUE); check_newlinechar(common, common->nltype, &newline, TRUE); - CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop); + CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop); JUMPHERE(end); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0); set_jumps(newline, LABEL()); } @@ -2839,15 +3363,15 @@ if (newlinecheck) { newlinelabel = LABEL(); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff); - OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); #endif OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - nl = JUMP(SLJIT_JUMP); + end2 = JUMP(SLJIT_JUMP); } mainloop = LABEL(); @@ -2862,25 +3386,25 @@ if (readuchar) OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); if (newlinecheck) - CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); #if defined SUPPORT_UTF && !defined COMPILE_PCRE32 #if defined COMPILE_PCRE8 if (common->utf) { - singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); JUMPHERE(singlechar); } #elif defined COMPILE_PCRE16 if (common->utf) { - singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); + singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); - OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); JUMPHERE(singlechar); @@ -2892,43 +3416,79 @@ JUMPHERE(start); if (newlinecheck) { JUMPHERE(end); - JUMPHERE(nl); + JUMPHERE(end2); } return mainloop; } -#define MAX_N_CHARS 3 +#define MAX_N_CHARS 16 +#define MAX_DIFF_CHARS 6 -static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline) +static SLJIT_INLINE void add_prefix_char(pcre_uchar chr, pcre_uchar *chars) { -DEFINE_COMPILER; -struct sljit_label *start; -struct sljit_jump *quit; -pcre_uint32 chars[MAX_N_CHARS * 2]; -pcre_uchar *cc = common->start + 1 + LINK_SIZE; -int location = 0; -pcre_int32 len, c, bit, caseless; -int must_stop; - -/* We do not support alternatives now. */ -if (*(common->start + GET(common->start, 1)) == OP_ALT) - return FALSE; +pcre_uchar i, len; +len = chars[0]; +if (len == 255) + return; + +if (len == 0) + { + chars[0] = 1; + chars[1] = chr; + return; + } + +for (i = len; i > 0; i--) + if (chars[i] == chr) + return; + +if (len >= MAX_DIFF_CHARS - 1) + { + chars[0] = 255; + return; + } + +len++; +chars[len] = chr; +chars[0] = len; +} + +static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uchar *chars, int max_chars, sljit_u32 *rec_count) +{ +/* Recursive function, which scans prefix literals. */ +BOOL last, any, class, caseless; +int len, repeat, len_save, consumed = 0; +sljit_u32 chr; /* Any unicode character. */ +sljit_u8 *bytes, *bytes_end, byte; +pcre_uchar *alternative, *cc_save, *oc; +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 +pcre_uchar othercase[8]; +#elif defined SUPPORT_UTF && defined COMPILE_PCRE16 +pcre_uchar othercase[2]; +#else +pcre_uchar othercase[1]; +#endif + +repeat = 1; while (TRUE) { - caseless = 0; - must_stop = 1; - switch(*cc) - { - case OP_CHAR: - must_stop = 0; - cc++; - break; + if (*rec_count == 0) + return 0; + (*rec_count)--; + + last = TRUE; + any = FALSE; + class = FALSE; + caseless = FALSE; + switch (*cc) + { case OP_CHARI: - caseless = 1; - must_stop = 0; + caseless = TRUE; + case OP_CHAR: + last = FALSE; cc++; break; @@ -2947,184 +3507,1045 @@ while (TRUE) cc++; continue; + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + cc = bracketend(cc); + continue; + + case OP_PLUSI: + case OP_MINPLUSI: + case OP_POSPLUSI: + caseless = TRUE; case OP_PLUS: case OP_MINPLUS: case OP_POSPLUS: cc++; break; + case OP_EXACTI: + caseless = TRUE; case OP_EXACT: + repeat = GET2(cc, 1); + last = FALSE; cc += 1 + IMM2_SIZE; break; - case OP_PLUSI: - case OP_MINPLUSI: - case OP_POSPLUSI: - caseless = 1; + case OP_QUERYI: + case OP_MINQUERYI: + case OP_POSQUERYI: + caseless = TRUE; + case OP_QUERY: + case OP_MINQUERY: + case OP_POSQUERY: + len = 1; cc++; +#ifdef SUPPORT_UTF + if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc); +#endif + max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count); + if (max_chars == 0) + return consumed; + last = FALSE; break; - case OP_EXACTI: - caseless = 1; + case OP_KET: + cc += 1 + LINK_SIZE; + continue; + + case OP_ALT: + cc += GET(cc, 1); + continue; + + case OP_ONCE: + case OP_ONCE_NC: + case OP_BRA: + case OP_BRAPOS: + case OP_CBRA: + case OP_CBRAPOS: + alternative = cc + GET(cc, 1); + while (*alternative == OP_ALT) + { + max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count); + if (max_chars == 0) + return consumed; + alternative += GET(alternative, 1); + } + + if (*cc == OP_CBRA || *cc == OP_CBRAPOS) + cc += IMM2_SIZE; + cc += 1 + LINK_SIZE; + continue; + + case OP_CLASS: +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 + if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE)) + return consumed; +#endif + class = TRUE; + break; + + case OP_NCLASS: +#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 + if (common->utf) return consumed; +#endif + class = TRUE; + break; + +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 + case OP_XCLASS: +#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 + if (common->utf) return consumed; +#endif + any = TRUE; + cc += GET(cc, 1); + break; +#endif + + case OP_DIGIT: +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 + if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE)) + return consumed; +#endif + any = TRUE; + cc++; + break; + + case OP_WHITESPACE: +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 + if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE)) + return consumed; +#endif + any = TRUE; + cc++; + break; + + case OP_WORDCHAR: +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 + if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE)) + return consumed; +#endif + any = TRUE; + cc++; + break; + + case OP_NOT: + case OP_NOTI: + cc++; + /* Fall through. */ + case OP_NOT_DIGIT: + case OP_NOT_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_ANY: + case OP_ALLANY: +#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 + if (common->utf) return consumed; +#endif + any = TRUE; + cc++; + break; + +#ifdef SUPPORT_UTF + case OP_NOTPROP: + case OP_PROP: +#ifndef COMPILE_PCRE32 + if (common->utf) return consumed; +#endif + any = TRUE; + cc += 1 + 2; + break; +#endif + + case OP_TYPEEXACT: + repeat = GET2(cc, 1); cc += 1 + IMM2_SIZE; + continue; + + case OP_NOTEXACT: + case OP_NOTEXACTI: +#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 + if (common->utf) return consumed; +#endif + any = TRUE; + repeat = GET2(cc, 1); + cc += 1 + IMM2_SIZE + 1; break; default: - must_stop = 2; - break; + return consumed; + } + + if (any) + { + do + { + chars[0] = 255; + + consumed++; + if (--max_chars == 0) + return consumed; + chars += MAX_DIFF_CHARS; + } + while (--repeat > 0); + + repeat = 1; + continue; } - if (must_stop == 2) + if (class) + { + bytes = (sljit_u8*) (cc + 1); + cc += 1 + 32 / sizeof(pcre_uchar); + + switch (*cc) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPOSSTAR: + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSQUERY: + max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count); + if (max_chars == 0) + return consumed; + break; + + default: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRPOSPLUS: break; + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + repeat = GET2(cc, 1); + if (repeat <= 0) + return consumed; + break; + } + + do + { + if (bytes[31] & 0x80) + chars[0] = 255; + else if (chars[0] != 255) + { + bytes_end = bytes + 32; + chr = 0; + do + { + byte = *bytes++; + SLJIT_ASSERT((chr & 0x7) == 0); + if (byte == 0) + chr += 8; + else + { + do + { + if ((byte & 0x1) != 0) + add_prefix_char(chr, chars); + byte >>= 1; + chr++; + } + while (byte != 0); + chr = (chr + 7) & ~7; + } + } + while (chars[0] != 255 && bytes < bytes_end); + bytes = bytes_end - 32; + } + + consumed++; + if (--max_chars == 0) + return consumed; + chars += MAX_DIFF_CHARS; + } + while (--repeat > 0); + + switch (*cc) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPOSSTAR: + return consumed; + + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSQUERY: + cc++; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE)) + return consumed; + cc += 1 + 2 * IMM2_SIZE; + break; + } + + repeat = 1; + continue; + } + len = 1; #ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]); + if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc); #endif if (caseless && char_has_othercase(common, cc)) { - caseless = char_get_othercase_bit(common, cc); - if (caseless == 0) - return FALSE; -#ifdef COMPILE_PCRE8 - caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8)); -#else - if ((caseless & 0x100) != 0) - caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9)); +#ifdef SUPPORT_UTF + if (common->utf) + { + GETCHAR(chr, cc); + if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len) + return consumed; + } else - caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9)); #endif + { + chr = *cc; + othercase[0] = TABLE_GET(chr, common->fcc, chr); + } } else - caseless = 0; + { + caseless = FALSE; + othercase[0] = 0; /* Stops compiler warning - PH */ + } - while (len > 0 && location < MAX_N_CHARS * 2) + len_save = len; + cc_save = cc; + while (TRUE) { - c = *cc; - bit = 0; - if (len == (caseless & 0xff)) + oc = othercase; + do { - bit = caseless >> 8; - c |= bit; + chr = *cc; + add_prefix_char(*cc, chars); + + if (caseless) + add_prefix_char(*oc, chars); + + len--; + consumed++; + if (--max_chars == 0) + return consumed; + chars += MAX_DIFF_CHARS; + cc++; + oc++; } + while (len > 0); - chars[location] = c; - chars[location + 1] = bit; + if (--repeat == 0) + break; - len--; - location += 2; - cc++; + len = len_save; + cc = cc_save; } - if (location >= MAX_N_CHARS * 2 || must_stop != 0) - break; + repeat = 1; + if (last) + return consumed; } +} -/* At least two characters are required. */ -if (location < 2 * 2) - return FALSE; +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) -if (firstline) +static sljit_s32 character_to_int32(pcre_uchar chr) +{ +sljit_s32 value = (sljit_s32)chr; +#if defined COMPILE_PCRE8 +#define SSE2_COMPARE_TYPE_INDEX 0 +return (value << 24) | (value << 16) | (value << 8) | value; +#elif defined COMPILE_PCRE16 +#define SSE2_COMPARE_TYPE_INDEX 1 +return (value << 16) | value; +#elif defined COMPILE_PCRE32 +#define SSE2_COMPARE_TYPE_INDEX 2 +return value; +#else +#error "Unsupported unit width" +#endif +} + +static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, pcre_uchar char1, pcre_uchar char2) +{ +DEFINE_COMPILER; +struct sljit_label *start; +struct sljit_jump *quit[3]; +struct sljit_jump *nomatch; +sljit_u8 instruction[8]; +sljit_s32 tmp1_ind = sljit_get_register_index(TMP1); +sljit_s32 tmp2_ind = sljit_get_register_index(TMP2); +sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR); +BOOL load_twice = FALSE; +pcre_uchar bit; + +bit = char1 ^ char2; +if (!is_powerof2(bit)) + bit = 0; + +if ((char1 != char2) && bit == 0) + load_twice = TRUE; + +quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + +/* First part (unaligned start) */ + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); + +SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1); + +/* MOVD xmm, r/m32 */ +instruction[0] = 0x66; +instruction[1] = 0x0f; +instruction[2] = 0x6e; +instruction[3] = 0xc0 | (2 << 3) | tmp1_ind; +sljit_emit_op_custom(compiler, instruction, 4); + +if (char1 != char2) { - SLJIT_ASSERT(common->first_line_end != 0); - OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); - OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); + + /* MOVD xmm, r/m32 */ + instruction[3] = 0xc0 | (3 << 3) | tmp1_ind; + sljit_emit_op_custom(compiler, instruction, 4); + } + +/* PSHUFD xmm1, xmm2/m128, imm8 */ +instruction[2] = 0x70; +instruction[3] = 0xc0 | (2 << 3) | 2; +instruction[4] = 0; +sljit_emit_op_custom(compiler, instruction, 5); + +if (char1 != char2) + { + /* PSHUFD xmm1, xmm2/m128, imm8 */ + instruction[3] = 0xc0 | (3 << 3) | 3; + instruction[4] = 0; + sljit_emit_op_custom(compiler, instruction, 5); + } + +OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf); +OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); + +/* MOVDQA xmm1, xmm2/m128 */ +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + +if (str_ptr_ind < 8) + { + instruction[2] = 0x6f; + instruction[3] = (0 << 3) | str_ptr_ind; + sljit_emit_op_custom(compiler, instruction, 4); + + if (load_twice) + { + instruction[3] = (1 << 3) | str_ptr_ind; + sljit_emit_op_custom(compiler, instruction, 4); + } } else - OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1)); + { + instruction[1] = 0x41; + instruction[2] = 0x0f; + instruction[3] = 0x6f; + instruction[4] = (0 << 3) | (str_ptr_ind & 0x7); + sljit_emit_op_custom(compiler, instruction, 5); -start = LABEL(); -quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + if (load_twice) + { + instruction[4] = (1 << 3) | str_ptr_ind; + sljit_emit_op_custom(compiler, instruction, 5); + } + instruction[1] = 0x0f; + } -OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); -OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -if (chars[1] != 0) - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]); -CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start); -if (location > 2 * 2) - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); -if (chars[3] != 0) - OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]); -CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start); -if (location > 2 * 2) - { - if (chars[5] != 0) - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]); - CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start); +#else + +instruction[2] = 0x6f; +instruction[3] = (0 << 3) | str_ptr_ind; +sljit_emit_op_custom(compiler, instruction, 4); + +if (load_twice) + { + instruction[3] = (1 << 3) | str_ptr_ind; + sljit_emit_op_custom(compiler, instruction, 4); } -OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -JUMPHERE(quit); +#endif -if (firstline) - OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); +if (bit != 0) + { + /* POR xmm1, xmm2/m128 */ + instruction[2] = 0xeb; + instruction[3] = 0xc0 | (0 << 3) | 3; + sljit_emit_op_custom(compiler, instruction, 4); + } + +/* PCMPEQB/W/D xmm1, xmm2/m128 */ +instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; +instruction[3] = 0xc0 | (0 << 3) | 2; +sljit_emit_op_custom(compiler, instruction, 4); + +if (load_twice) + { + instruction[3] = 0xc0 | (1 << 3) | 3; + sljit_emit_op_custom(compiler, instruction, 4); + } + +/* PMOVMSKB reg, xmm */ +instruction[2] = 0xd7; +instruction[3] = 0xc0 | (tmp1_ind << 3) | 0; +sljit_emit_op_custom(compiler, instruction, 4); + +if (load_twice) + { + OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0); + instruction[3] = 0xc0 | (tmp2_ind << 3) | 1; + sljit_emit_op_custom(compiler, instruction, 4); + + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0); + } + +OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0); + +/* BSF r32, r/m32 */ +instruction[0] = 0x0f; +instruction[1] = 0xbc; +instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind; +sljit_emit_op_custom(compiler, instruction, 3); + +nomatch = JUMP(SLJIT_ZERO); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +quit[1] = JUMP(SLJIT_JUMP); + +JUMPHERE(nomatch); + +start = LABEL(); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); +quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + +/* Second part (aligned) */ + +instruction[0] = 0x66; +instruction[1] = 0x0f; + +/* MOVDQA xmm1, xmm2/m128 */ +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + +if (str_ptr_ind < 8) + { + instruction[2] = 0x6f; + instruction[3] = (0 << 3) | str_ptr_ind; + sljit_emit_op_custom(compiler, instruction, 4); + + if (load_twice) + { + instruction[3] = (1 << 3) | str_ptr_ind; + sljit_emit_op_custom(compiler, instruction, 4); + } + } else - OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1)); -return TRUE; + { + instruction[1] = 0x41; + instruction[2] = 0x0f; + instruction[3] = 0x6f; + instruction[4] = (0 << 3) | (str_ptr_ind & 0x7); + sljit_emit_op_custom(compiler, instruction, 5); + + if (load_twice) + { + instruction[4] = (1 << 3) | str_ptr_ind; + sljit_emit_op_custom(compiler, instruction, 5); + } + instruction[1] = 0x0f; + } + +#else + +instruction[2] = 0x6f; +instruction[3] = (0 << 3) | str_ptr_ind; +sljit_emit_op_custom(compiler, instruction, 4); + +if (load_twice) + { + instruction[3] = (1 << 3) | str_ptr_ind; + sljit_emit_op_custom(compiler, instruction, 4); + } + +#endif + +if (bit != 0) + { + /* POR xmm1, xmm2/m128 */ + instruction[2] = 0xeb; + instruction[3] = 0xc0 | (0 << 3) | 3; + sljit_emit_op_custom(compiler, instruction, 4); + } + +/* PCMPEQB/W/D xmm1, xmm2/m128 */ +instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; +instruction[3] = 0xc0 | (0 << 3) | 2; +sljit_emit_op_custom(compiler, instruction, 4); + +if (load_twice) + { + instruction[3] = 0xc0 | (1 << 3) | 3; + sljit_emit_op_custom(compiler, instruction, 4); + } + +/* PMOVMSKB reg, xmm */ +instruction[2] = 0xd7; +instruction[3] = 0xc0 | (tmp1_ind << 3) | 0; +sljit_emit_op_custom(compiler, instruction, 4); + +if (load_twice) + { + instruction[3] = 0xc0 | (tmp2_ind << 3) | 1; + sljit_emit_op_custom(compiler, instruction, 4); + + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); + } + +/* BSF r32, r/m32 */ +instruction[0] = 0x0f; +instruction[1] = 0xbc; +instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind; +sljit_emit_op_custom(compiler, instruction, 3); + +JUMPTO(SLJIT_ZERO, start); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + +start = LABEL(); +SET_LABEL(quit[0], start); +SET_LABEL(quit[1], start); +SET_LABEL(quit[2], start); } -#undef MAX_N_CHARS +#undef SSE2_COMPARE_TYPE_INDEX -static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline) +#endif + +static void fast_forward_first_char2(compiler_common *common, pcre_uchar char1, pcre_uchar char2, sljit_s32 offset) { DEFINE_COMPILER; struct sljit_label *start; struct sljit_jump *quit; struct sljit_jump *found; -pcre_uchar oc, bit; +pcre_uchar mask; +#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 +struct sljit_label *utf_start = NULL; +struct sljit_jump *utf_quit = NULL; +#endif +BOOL has_match_end = (common->match_end_ptr != 0); -if (firstline) +if (offset > 0) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); + +if (has_match_end) { - SLJIT_ASSERT(common->first_line_end != 0); OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end); + + OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1)); +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + if (sljit_x86_is_cmov_available()) + { + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0); + sljit_x86_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0); + } +#endif + { + quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0); + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); + JUMPHERE(quit); + } } -start = LABEL(); -quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); -OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); +#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 +if (common->utf && offset > 0) + utf_start = LABEL(); +#endif -oc = first_char; -if (caseless) +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + +/* SSE2 accelerated first character search. */ + +if (sljit_x86_is_sse2_available()) { - oc = TABLE_GET(first_char, common->fcc, first_char); -#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) - if (first_char > 127 && common->utf) - oc = UCD_OTHERCASE(first_char); + fast_forward_first_char2_sse2(common, char1, char2); + + SLJIT_ASSERT(common->mode == JIT_COMPILE || offset == 0); + if (common->mode == JIT_COMPILE) + { + /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */ + SLJIT_ASSERT(common->forced_quit_label == NULL); + OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); + add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 + if (common->utf && offset > 0) + { + SLJIT_ASSERT(common->mode == JIT_COMPILE); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +#if defined COMPILE_PCRE8 + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start); +#elif defined COMPILE_PCRE16 + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start); +#else +#error "Unknown code width" +#endif + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + } #endif + + if (offset > 0) + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); + } + else if (sljit_x86_is_cmov_available()) + { + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); + sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0); + } + else + { + quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); + OP1(SLJIT_MOV, STR_PTR, 0, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0); + JUMPHERE(quit); + } + + if (has_match_end) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); + return; } -if (first_char == oc) - found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char); + +#endif + +quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + +start = LABEL(); +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + +if (char1 == char2) + found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1); else { - bit = first_char ^ oc; - if (is_powerof2(bit)) + mask = char1 ^ char2; + if (is_powerof2(mask)) { - OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit); - found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask); + found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask); } else { - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - found = JUMP(SLJIT_C_NOT_ZERO); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); + found = JUMP(SLJIT_NOT_ZERO); } } OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -JUMPTO(SLJIT_JUMP, start); +CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start); + +#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 +if (common->utf && offset > 0) + utf_quit = JUMP(SLJIT_JUMP); +#endif + JUMPHERE(found); + +#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 +if (common->utf && offset > 0) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +#if defined COMPILE_PCRE8 + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start); +#elif defined COMPILE_PCRE16 + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start); +#else +#error "Unknown code width" +#endif + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + JUMPHERE(utf_quit); + } +#endif + JUMPHERE(quit); -if (firstline) +if (has_match_end) + { + quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + if (offset > 0) + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); + JUMPHERE(quit); OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); + } + +if (offset > 0) + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); } -static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline) +static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common) +{ +DEFINE_COMPILER; +struct sljit_label *start; +struct sljit_jump *quit; +struct sljit_jump *match; +/* bytes[0] represent the number of characters between 0 +and MAX_N_BYTES - 1, 255 represents any character. */ +pcre_uchar chars[MAX_N_CHARS * MAX_DIFF_CHARS]; +sljit_s32 offset; +pcre_uchar mask; +pcre_uchar *char_set, *char_set_end; +int i, max, from; +int range_right = -1, range_len; +sljit_u8 *update_table = NULL; +BOOL in_range; +sljit_u32 rec_count; + +for (i = 0; i < MAX_N_CHARS; i++) + chars[i * MAX_DIFF_CHARS] = 0; + +rec_count = 10000; +max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count); + +if (max < 1) + return FALSE; + +in_range = FALSE; +/* Prevent compiler "uninitialized" warning */ +from = 0; +range_len = 4 /* minimum length */ - 1; +for (i = 0; i <= max; i++) + { + if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255)) + { + range_len = i - from; + range_right = i - 1; + } + + if (i < max && chars[i * MAX_DIFF_CHARS] < 255) + { + SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0); + if (!in_range) + { + in_range = TRUE; + from = i; + } + } + else + in_range = FALSE; + } + +if (range_right >= 0) + { + update_table = (sljit_u8 *)allocate_read_only_data(common, 256); + if (update_table == NULL) + return TRUE; + memset(update_table, IN_UCHARS(range_len), 256); + + for (i = 0; i < range_len; i++) + { + char_set = chars + ((range_right - i) * MAX_DIFF_CHARS); + SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255); + char_set_end = char_set + char_set[0]; + char_set++; + while (char_set <= char_set_end) + { + if (update_table[(*char_set) & 0xff] > IN_UCHARS(i)) + update_table[(*char_set) & 0xff] = IN_UCHARS(i); + char_set++; + } + } + } + +offset = -1; +/* Scan forward. */ +for (i = 0; i < max; i++) + { + if (offset == -1) + { + if (chars[i * MAX_DIFF_CHARS] <= 2) + offset = i; + } + else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2) + { + if (chars[i * MAX_DIFF_CHARS] == 1) + offset = i; + else + { + mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2]; + if (!is_powerof2(mask)) + { + mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2]; + if (is_powerof2(mask)) + offset = i; + } + } + } + } + +if (range_right < 0) + { + if (offset < 0) + return FALSE; + SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2); + /* Works regardless the value is 1 or 2. */ + mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]]; + fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset); + return TRUE; + } + +if (range_right == offset) + offset = -1; + +SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2)); + +max -= 1; +SLJIT_ASSERT(max > 0); +if (common->match_end_ptr != 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); + OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); + quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0); + OP1(SLJIT_MOV, STR_END, 0, TMP1, 0); + JUMPHERE(quit); + } +else + OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); + +SLJIT_ASSERT(range_right >= 0); + +#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table); +#endif + +start = LABEL(); +quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + +#if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right)); +#else +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1); +#endif + +#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0); +#else +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table); +#endif +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start); + +if (offset >= 0) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset)); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + if (chars[offset * MAX_DIFF_CHARS] == 1) + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start); + else + { + mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2]; + if (is_powerof2(mask)) + { + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start); + } + else + { + match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start); + JUMPHERE(match); + } + } + } + +#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 +if (common->utf && offset != 0) + { + if (offset < 0) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + } + else + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); +#if defined COMPILE_PCRE8 + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start); +#elif defined COMPILE_PCRE16 + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start); +#else +#error "Unknown code width" +#endif + if (offset < 0) + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + } +#endif + +if (offset >= 0) + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + +JUMPHERE(quit); + +if (common->match_end_ptr != 0) + { + if (range_right >= 0) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); + if (range_right >= 0) + { + quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0); + OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); + JUMPHERE(quit); + } + } +else + OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); +return TRUE; +} + +#undef MAX_N_CHARS +#undef MAX_DIFF_CHARS + +static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless) +{ +pcre_uchar oc; + +oc = first_char; +if (caseless) + { + oc = TABLE_GET(first_char, common->fcc, first_char); +#if defined SUPPORT_UCP && !defined COMPILE_PCRE8 + if (first_char > 127 && common->utf) + oc = UCD_OTHERCASE(first_char); +#endif + } + +fast_forward_first_char2(common, first_char, oc, 0); +} + +static SLJIT_INLINE void fast_forward_newline(compiler_common *common) { DEFINE_COMPILER; struct sljit_label *loop; @@ -3135,24 +4556,23 @@ struct sljit_jump *foundcr = NULL; struct sljit_jump *notfoundnl; jump_list *newline = NULL; -if (firstline) +if (common->match_end_ptr != 0) { - SLJIT_ASSERT(common->first_line_end != 0); OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); } if (common->nltype == NLTYPE_FIXED && common->newline > 255) { - lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); - firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0); + firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2)); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL); #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT); #endif @@ -3160,31 +4580,33 @@ if (common->nltype == NLTYPE_FIXED && common->newline > 255) loop = LABEL(); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); - CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop); - CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop); + CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop); JUMPHERE(quit); JUMPHERE(firstchar); JUMPHERE(lastchar); - if (firstline) - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); + if (common->match_end_ptr != 0) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); return; } OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); -firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0); +firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); skip_char_back(common); loop = LABEL(); -read_char(common); -lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +common->ff_newline_shortcut = loop; + +read_char_range(common, common->nlmin, common->nlmax, TRUE); +lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) - foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); + foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); check_newlinechar(common, common->nltype, &newline, FALSE); set_jumps(newline, loop); @@ -3192,10 +4614,10 @@ if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) { quit = JUMP(SLJIT_JUMP); JUMPHERE(foundcr); - notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL); - OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); #endif @@ -3206,56 +4628,50 @@ if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) JUMPHERE(lastchar); JUMPHERE(firstchar); -if (firstline) +if (common->match_end_ptr != 0) OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); } -static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks); +static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks); -static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline) +static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_u8 *start_bits) { DEFINE_COMPILER; struct sljit_label *start; struct sljit_jump *quit; struct sljit_jump *found = NULL; jump_list *matches = NULL; -pcre_uint8 inverted_start_bits[32]; -int i; #ifndef COMPILE_PCRE8 struct sljit_jump *jump; #endif -for (i = 0; i < 32; ++i) - inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]); - -if (firstline) +if (common->match_end_ptr != 0) { - SLJIT_ASSERT(common->first_line_end != 0); OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0); - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); } start = LABEL(); -quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); #ifdef SUPPORT_UTF if (common->utf) OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); #endif -if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches)) +if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches)) { #ifndef COMPILE_PCRE8 - jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255); + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255); JUMPHERE(jump); #endif OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits); OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); - found = JUMP(SLJIT_C_NOT_ZERO); + found = JUMP(SLJIT_NOT_ZERO); } #ifdef SUPPORT_UTF @@ -3267,17 +4683,17 @@ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); #if defined COMPILE_PCRE8 if (common->utf) { - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); } #elif defined COMPILE_PCRE16 if (common->utf) { - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start); + CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); - OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); } @@ -3290,7 +4706,7 @@ if (matches != NULL) set_jumps(matches, LABEL()); JUMPHERE(quit); -if (firstline) +if (common->match_end_ptr != 0) OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0); } @@ -3303,13 +4719,13 @@ struct sljit_jump *alreadyfound; struct sljit_jump *found; struct sljit_jump *foundoc = NULL; struct sljit_jump *notfound; -pcre_uint32 oc, bit; +sljit_u32 oc, bit; SLJIT_ASSERT(common->req_char_ptr != 0); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr); OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX); -toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0); -alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0); +toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0); +alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); if (has_firstchar) OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); @@ -3317,7 +4733,7 @@ else OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0); loop = LABEL(); -notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0); +notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0); OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0); oc = req_char; @@ -3330,19 +4746,19 @@ if (caseless) #endif } if (req_char == oc) - found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char); + found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char); else { bit = req_char ^ oc; if (is_powerof2(bit)) { OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit); - found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit); + found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit); } else { - found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char); - foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc); + found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char); + foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc); } } OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); @@ -3351,7 +4767,7 @@ JUMPTO(SLJIT_JUMP, loop); JUMPHERE(found); if (foundoc) JUMPHERE(foundoc); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0); JUMPHERE(alreadyfound); JUMPHERE(toolong); return notfound; @@ -3371,7 +4787,7 @@ GET_LOCAL_BASE(TMP3, 0, 0); mainloop = LABEL(); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0); OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0); -jump = JUMP(SLJIT_C_SIG_LESS_EQUAL); +jump = JUMP(SLJIT_SIG_LESS_EQUAL); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0); OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw)); @@ -3380,7 +4796,7 @@ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw)); JUMPTO(SLJIT_JUMP, mainloop); JUMPHERE(jump); -jump = JUMP(SLJIT_C_SIG_LESS); +jump = JUMP(SLJIT_SIG_LESS); /* End of dropping frames. */ sljit_emit_fast_return(compiler, RETURN_ADDR, 0); @@ -3403,12 +4819,12 @@ struct sljit_jump *jump; SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16); -sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); +sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); /* Get type of the previous char, and put it to LOCALS1. */ OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0); -skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0); +skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0); skip_char_back(common); check_start_used_ptr(common); read_char(common); @@ -3418,32 +4834,32 @@ read_char(common); if (common->use_ucp) { OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); - jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); + jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); JUMPHERE(jump); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0); } else #endif { #ifndef COMPILE_PCRE8 - jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); #elif defined SUPPORT_UTF /* Here LOCALS1 has already been zeroed. */ jump = NULL; if (common->utf) - jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); #endif /* COMPILE_PCRE8 */ - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes); OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0); #ifndef COMPILE_PCRE8 JUMPHERE(jump); #elif defined SUPPORT_UTF @@ -3455,21 +4871,21 @@ JUMPHERE(skipread); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); check_str_end(common, &skipread_list); -peek_char(common); +peek_char(common, READ_CHAR_MAX); /* Testing char type. This is a code duplication. */ #ifdef SUPPORT_UCP if (common->use_ucp) { OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); - jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); + jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); JUMPHERE(jump); } else @@ -3478,14 +4894,14 @@ else #ifndef COMPILE_PCRE8 /* TMP2 may be destroyed by peek_char. */ OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); - jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); #elif defined SUPPORT_UTF OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); jump = NULL; if (common->utf) - jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); #endif - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes); + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes); OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); #ifndef COMPILE_PCRE8 @@ -3497,114 +4913,20 @@ else } set_jumps(skipread_list, LABEL()); -OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1); -sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); +OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); +sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); } -/* - range format: - - ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range). - ranges[1] = first bit (0 or 1) - ranges[2-length] = position of the bit change (when the current bit is not equal to the previous) -*/ - -static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch) +static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks) { +/* May destroy TMP1. */ DEFINE_COMPILER; -struct sljit_jump *jump; - -if (ranges[0] < 0) - return FALSE; - -switch(ranges[0]) - { - case 1: - if (readch) - read_char(common); - add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); - return TRUE; - - case 2: - if (readch) - read_char(common); - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]); - add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); - return TRUE; - - case 4: - if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5]) - { - if (readch) - read_char(common); - if (ranges[1] != 0) - { - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4])); - } - else - { - jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4])); - JUMPHERE(jump); - } - return TRUE; - } - if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2])) - { - if (readch) - read_char(common); - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]); - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]); - add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4])); - return TRUE; - } - return FALSE; - - default: - return FALSE; - } -} - -static void get_ctype_ranges(compiler_common *common, int flag, int *ranges) -{ -int i, bit, length; -const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes; - -bit = ctypes[0] & flag; -ranges[0] = -1; -ranges[1] = bit != 0 ? 1 : 0; -length = 0; - -for (i = 1; i < 256; i++) - if ((ctypes[i] & flag) != bit) - { - if (length >= MAX_RANGE_SIZE) - return; - ranges[2 + length] = i; - length++; - bit ^= flag; - } - -if (bit != 0) - { - if (length >= MAX_RANGE_SIZE) - return; - ranges[2 + length] = 256; - length++; - } -ranges[0] = length; -} - -static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks) -{ -int ranges[2 + MAX_RANGE_SIZE]; -pcre_uint8 bit, cbit, all; +int ranges[MAX_RANGE_SIZE]; +sljit_u8 bit, cbit, all; int i, byte, length = 0; bit = bits[0] & 0x1; -ranges[1] = bit; -/* Can be 0 or 255. */ +/* All bits will be zero or one (since bit is zero or one). */ all = -bit; for (i = 0; i < 256; ) @@ -3619,7 +4941,7 @@ for (i = 0; i < 256; ) { if (length >= MAX_RANGE_SIZE) return FALSE; - ranges[2 + length] = i; + ranges[length] = i; length++; bit = cbit; all = -cbit; @@ -3632,12 +4954,119 @@ if (((bit == 0) && nclass) || ((bit == 1) && !nclass)) { if (length >= MAX_RANGE_SIZE) return FALSE; - ranges[2 + length] = 256; + ranges[length] = 256; length++; } -ranges[0] = length; -return check_ranges(common, ranges, backtracks, FALSE); +if (length < 0 || length > 4) + return FALSE; + +bit = bits[0] & 0x1; +if (invert) bit ^= 0x1; + +/* No character is accepted. */ +if (length == 0 && bit == 0) + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + +switch(length) + { + case 0: + /* When bit != 0, all characters are accepted. */ + return TRUE; + + case 1: + add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); + return TRUE; + + case 2: + if (ranges[0] + 1 != ranges[1]) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); + add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); + } + else + add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); + return TRUE; + + case 3: + if (bit != 0) + { + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); + if (ranges[0] + 1 != ranges[1]) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); + } + else + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); + return TRUE; + } + + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0])); + if (ranges[1] + 1 != ranges[2]) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1])); + } + else + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1])); + return TRUE; + + case 4: + if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2]) + && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2] + && (ranges[1] & (ranges[2] - ranges[0])) == 0 + && is_powerof2(ranges[2] - ranges[0])) + { + SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0); + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]); + if (ranges[2] + 1 != ranges[3]) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]); + add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); + } + else + add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); + return TRUE; + } + + if (bit != 0) + { + i = 0; + if (ranges[0] + 1 != ranges[1]) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); + i = ranges[0]; + } + else + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); + + if (ranges[2] + 1 != ranges[3]) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); + } + else + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i)); + return TRUE; + } + + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0])); + if (ranges[1] + 1 != ranges[2]) + { + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1])); + } + else + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); + return TRUE; + + default: + SLJIT_ASSERT_STOP(); + return FALSE; + } } static void check_anynewline(compiler_common *common) @@ -3649,21 +5078,21 @@ sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); -OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); +OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 #ifdef COMPILE_PCRE8 if (common->utf) { #endif - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); #ifdef COMPILE_PCRE8 } #endif #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */ -OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); +OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); } @@ -3675,33 +5104,33 @@ DEFINE_COMPILER; sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09); -OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); +OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); -OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); +OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0); #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 #ifdef COMPILE_PCRE8 if (common->utf) { #endif - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680); - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e); - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000); - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000); - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000); - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000); #ifdef COMPILE_PCRE8 } #endif #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */ -OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); +OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); } @@ -3715,21 +5144,21 @@ sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); -OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); +OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 #ifdef COMPILE_PCRE8 if (common->utf) { #endif - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); #ifdef COMPILE_PCRE8 } #endif #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */ -OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); +OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); } @@ -3746,21 +5175,21 @@ struct sljit_label *label; sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); label = LABEL(); OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1)); OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); -jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0); +jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0); OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); -JUMPTO(SLJIT_C_NOT_ZERO, label); +JUMPTO(SLJIT_NOT_ZERO, label); JUMPHERE(jump); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0); -OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); +OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); } @@ -3776,8 +5205,8 @@ sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0); OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); @@ -3786,26 +5215,26 @@ label = LABEL(); OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1)); OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); #ifndef COMPILE_PCRE8 -jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255); +jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255); #endif -OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0); +OP1(SLJIT_MOV_U8, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0); #ifndef COMPILE_PCRE8 JUMPHERE(jump); -jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255); +jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255); #endif -OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0); +OP1(SLJIT_MOV_U8, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0); #ifndef COMPILE_PCRE8 JUMPHERE(jump); #endif -jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0); +jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0); OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); -JUMPTO(SLJIT_C_NOT_ZERO, label); +JUMPTO(SLJIT_NOT_ZERO, label); JUMPHERE(jump); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0); -OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); -OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1); +OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); +OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); } @@ -3818,11 +5247,11 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0); static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1) { /* This function would be ineffective to do in JIT level. */ -pcre_uint32 c1, c2; +sljit_u32 c1, c2; const pcre_uchar *src2 = args->uchar_ptr; const pcre_uchar *end2 = args->end; const ucd_record *ur; -const pcre_uint32 *pp; +const sljit_u32 *pp; while (src1 < end1) { @@ -3847,7 +5276,7 @@ return src2; #endif /* SUPPORT_UTF && SUPPORT_UCP */ static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc, - compare_context* context, jump_list **backtracks) + compare_context *context, jump_list **backtracks) { DEFINE_COMPILER; unsigned int othercasebit = 0; @@ -3882,16 +5311,16 @@ if (context->sourcereg == -1) #if defined COMPILE_PCRE8 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED if (context->length >= 4) - OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); else if (context->length >= 2) - OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); else #endif - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); #elif defined COMPILE_PCRE16 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED if (context->length >= 4) - OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); else #endif OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); @@ -3933,12 +5362,12 @@ do #endif { if (context->length >= 4) - OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); + OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); else if (context->length >= 2) - OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); + OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); #if defined COMPILE_PCRE8 else if (context->length >= 1) - OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); + OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); #endif /* COMPILE_PCRE8 */ context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; @@ -3947,20 +5376,20 @@ do case 4 / sizeof(pcre_uchar): if (context->oc.asint != 0) OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint)); break; case 2 / sizeof(pcre_uchar): if (context->oc.asushort != 0) OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort)); break; #ifdef COMPILE_PCRE8 case 1: if (context->oc.asbyte != 0) OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte)); break; #endif @@ -3982,10 +5411,10 @@ do if (othercasebit != 0 && othercasechar == cc) { OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit)); } else - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc)); #endif @@ -4004,104 +5433,76 @@ return cc; #define SET_TYPE_OFFSET(value) \ if ((value) != typeoffset) \ { \ - if ((value) > typeoffset) \ - OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \ - else \ + if ((value) < typeoffset) \ OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \ + else \ + OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \ } \ typeoffset = (value); #define SET_CHAR_OFFSET(value) \ if ((value) != charoffset) \ { \ - if ((value) > charoffset) \ - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \ + if ((value) < charoffset) \ + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \ else \ - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \ + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \ } \ charoffset = (value); +static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr); + static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks) { DEFINE_COMPILER; jump_list *found = NULL; -jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks; -pcre_int32 c, charoffset; -const pcre_uint32 *other_cases; +jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks; +sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX; struct sljit_jump *jump = NULL; pcre_uchar *ccbegin; int compares, invertcmp, numberofcmps; +#if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16) +BOOL utf = common->utf; +#endif + #ifdef SUPPORT_UCP BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE; BOOL charsaved = FALSE; -int typereg = TMP1, scriptreg = TMP1; -pcre_int32 typeoffset; +int typereg = TMP1; +const sljit_u32 *other_cases; +sljit_uw typeoffset; #endif -/* Although SUPPORT_UTF must be defined, we are - not necessary in utf mode even in 8 bit mode. */ -detect_partial_match(common, backtracks); -read_char(common); - -if ((*cc++ & XCL_MAP) != 0) +/* Scanning the necessary info. */ +cc++; +ccbegin = cc; +compares = 0; +if (cc[-1] & XCL_MAP) { - OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); -#ifndef COMPILE_PCRE8 - jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); -#elif defined SUPPORT_UTF - if (common->utf) - jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); -#endif - - if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list)) - { - OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); - OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); - OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); - add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO)); - } - -#ifndef COMPILE_PCRE8 - JUMPHERE(jump); -#elif defined SUPPORT_UTF - if (common->utf) - JUMPHERE(jump); -#endif - OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); -#ifdef SUPPORT_UCP - charsaved = TRUE; -#endif + min = 0; cc += 32 / sizeof(pcre_uchar); } -/* Scanning the necessary info. */ -ccbegin = cc; -compares = 0; while (*cc != XCL_END) { compares++; if (*cc == XCL_SINGLE) { - cc += 2; -#ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif + cc ++; + GETCHARINCTEST(c, cc); + if (c > max) max = c; + if (c < min) min = c; #ifdef SUPPORT_UCP needschar = TRUE; #endif } else if (*cc == XCL_RANGE) { - cc += 2; -#ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif - cc++; -#ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); -#endif + cc ++; + GETCHARINCTEST(c, cc); + if (c < min) min = c; + GETCHARINCTEST(c, cc); + if (c > max) max = c; #ifdef SUPPORT_UCP needschar = TRUE; #endif @@ -4111,9 +5512,33 @@ while (*cc != XCL_END) { SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); cc++; + if (*cc == PT_CLIST) + { + other_cases = PRIV(ucd_caseless_sets) + cc[1]; + while (*other_cases != NOTACHAR) + { + if (*other_cases > max) max = *other_cases; + if (*other_cases < min) min = *other_cases; + other_cases++; + } + } + else + { + max = READ_CHAR_MAX; + min = 0; + } + switch(*cc) { case PT_ANY: + /* Any either accepts everything or ignored. */ + if (cc[-1] == XCL_PROP) + { + compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE); + if (list == backtracks) + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + return; + } break; case PT_LAMP: @@ -4130,6 +5555,9 @@ while (*cc != XCL_END) case PT_SPACE: case PT_PXSPACE: case PT_WORD: + case PT_PXGRAPH: + case PT_PXPRINT: + case PT_PXPUNCT: needstype = TRUE; needschar = TRUE; break; @@ -4147,49 +5575,147 @@ while (*cc != XCL_END) } #endif } +SLJIT_ASSERT(compares > 0); + +/* We are not necessary in utf mode even in 8 bit mode. */ +cc = ccbegin; +read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0); + +if ((cc[-1] & XCL_HASPROP) == 0) + { + if ((cc[-1] & XCL_MAP) != 0) + { + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); + if (!check_class_ranges(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found)) + { + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); + OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO)); + } + + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(jump); + + cc += 32 / sizeof(pcre_uchar); + } + else + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min); + add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min)); + } + } +else if ((cc[-1] & XCL_MAP) != 0) + { + OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); +#ifdef SUPPORT_UCP + charsaved = TRUE; +#endif + if (!check_class_ranges(common, (const sljit_u8 *)cc, FALSE, TRUE, list)) + { +#ifdef COMPILE_PCRE8 + jump = NULL; + if (common->utf) +#endif + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); + + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); + OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO)); + +#ifdef COMPILE_PCRE8 + if (common->utf) +#endif + JUMPHERE(jump); + } + + OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); + cc += 32 / sizeof(pcre_uchar); + } #ifdef SUPPORT_UCP -/* Simple register allocation. TMP1 is preferred if possible. */ if (needstype || needsscript) { if (needschar && !charsaved) - OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); - add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); - if (needschar) + OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); + + OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); + OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); + + /* Before anything else, we deal with scripts. */ + if (needsscript) { - if (needstype) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); + + ccbegin = cc; + + while (*cc != XCL_END) { - OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); - typereg = RETURN_ADDR; + if (*cc == XCL_SINGLE) + { + cc ++; + GETCHARINCTEST(c, cc); + } + else if (*cc == XCL_RANGE) + { + cc ++; + GETCHARINCTEST(c, cc); + GETCHARINCTEST(c, cc); + } + else + { + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); + cc++; + if (*cc == PT_SC) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + if (cc[-1] == XCL_NOTPROP) + invertcmp ^= 0x1; + jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]); + add_jump(compiler, compares > 0 ? list : backtracks, jump); + } + cc += 2; + } } - if (needsscript) - scriptreg = TMP3; - OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); + cc = ccbegin; } - else if (needstype && needsscript) - scriptreg = TMP3; - /* In all other cases only one of them was specified, and that can goes to TMP1. */ - if (needsscript) + if (needschar) + { + OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); + } + + if (needstype) { - if (scriptreg == TMP1) + if (!needschar) { - OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); - OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); } else { OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); - OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); - OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0); + OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); + typereg = RETURN_ADDR; } } } #endif /* Generating code. */ -cc = ccbegin; charoffset = 0; numberofcmps = 0; #ifdef SUPPORT_UCP @@ -4205,148 +5731,123 @@ while (*cc != XCL_END) if (*cc == XCL_SINGLE) { cc ++; -#ifdef SUPPORT_UTF - if (common->utf) - { - GETCHARINC(c, cc); - } - else -#endif - c = *cc++; + GETCHARINCTEST(c, cc); if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) { - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset); - OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL); numberofcmps++; } else if (numberofcmps > 0) { - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); numberofcmps = 0; } else { - jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset); + jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); numberofcmps = 0; } } else if (*cc == XCL_RANGE) { cc ++; -#ifdef SUPPORT_UTF - if (common->utf) - { - GETCHARINC(c, cc); - } - else -#endif - c = *cc++; + GETCHARINCTEST(c, cc); SET_CHAR_OFFSET(c); -#ifdef SUPPORT_UTF - if (common->utf) - { - GETCHARINC(c, cc); - } - else -#endif - c = *cc++; + GETCHARINCTEST(c, cc); + if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) { - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset); - OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL); numberofcmps++; } else if (numberofcmps > 0) { - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); - jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); numberofcmps = 0; } else { - jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset); + jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); numberofcmps = 0; } } #ifdef SUPPORT_UCP else { + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); if (*cc == XCL_NOTPROP) invertcmp ^= 0x1; cc++; switch(*cc) { case PT_ANY: - if (list != backtracks) - { - if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0)) - continue; - } - else if (cc[-1] == XCL_NOTPROP) - continue; - jump = JUMP(SLJIT_JUMP); + if (!invertcmp) + jump = JUMP(SLJIT_JUMP); break; case PT_LAMP: OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset); - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); break; case PT_GC: c = PRIV(ucp_typerange)[(int)cc[1] * 2]; SET_TYPE_OFFSET(c); - jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c); + jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c); break; case PT_PC: - jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset); + jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset); break; case PT_SC: - jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]); + compares++; + /* Do nothing. */ break; case PT_SPACE: case PT_PXSPACE: - if (*cc == PT_SPACE) - { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); - jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset); - } SET_CHAR_OFFSET(9); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); - if (*cc == PT_SPACE) - JUMPHERE(jump); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); + + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); + + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL); SET_TYPE_OFFSET(ucp_Zl); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); - jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); break; case PT_WORD: - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); /* Fall through. */ case PT_ALNUM: SET_TYPE_OFFSET(ucp_Ll); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); - OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL); + OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL); SET_TYPE_OFFSET(ucp_Nd); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); - jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); break; case PT_CLIST: @@ -4368,7 +5869,7 @@ while (*cc != XCL_END) OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); } OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); other_cases += 2; } else if (is_powerof2(other_cases[2] ^ other_cases[1])) @@ -4381,42 +5882,107 @@ while (*cc != XCL_END) OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); } OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset); - OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset)); + OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL); other_cases += 3; } else { - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); } while (*other_cases != NOTACHAR) { - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset); - OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); + OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL); } - jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); break; case PT_UCNC: - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset); - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset); - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset)); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset)); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); SET_CHAR_OFFSET(0xa0); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset); - OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset)); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); SET_CHAR_OFFSET(0); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL); - jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + break; + + case PT_PXGRAPH: + /* C and Z groups are the farthest two groups. */ + SET_TYPE_OFFSET(ucp_Ll); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER); + + jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); + + /* In case of ucp_Cf, we overwrite the result. */ + SET_CHAR_OFFSET(0x2066); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); + + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); + + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); + + JUMPHERE(jump); + jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); + break; + + case PT_PXPRINT: + /* C and Z groups are the farthest two groups. */ + SET_TYPE_OFFSET(ucp_Ll); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER); + + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll); + OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL); + + jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); + + /* In case of ucp_Cf, we overwrite the result. */ + SET_CHAR_OFFSET(0x2066); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); + + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); + OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL); + + JUMPHERE(jump); + jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); + break; + + case PT_PXPUNCT: + SET_TYPE_OFFSET(ucp_Sc); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(0); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f); + OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_TYPE_OFFSET(ucp_Pc); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + break; + + default: + SLJIT_ASSERT_STOP(); break; } cc += 2; @@ -4436,90 +6002,301 @@ if (found != NULL) #endif -static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks) +static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks) { DEFINE_COMPILER; int length; -unsigned int c, oc, bit; -compare_context context; struct sljit_jump *jump[4]; -jump_list *end_list; #ifdef SUPPORT_UTF struct sljit_label *label; -#ifdef SUPPORT_UCP -pcre_uchar propdata[5]; -#endif -#endif +#endif /* SUPPORT_UTF */ switch(type) { case OP_SOD: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); return cc; case OP_SOM: OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); return cc; case OP_NOT_WORD_BOUNDARY: case OP_WORD_BOUNDARY: add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); + add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO)); + return cc; + + case OP_EODN: + /* Requires rather complex checks. */ + jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + if (common->nltype == NLTYPE_FIXED && common->newline > 255) + { + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (common->mode == JIT_COMPILE) + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0)); + else + { + jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); + OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL)); + check_partial(common, TRUE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(jump[1]); + } + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); + } + else if (common->nltype == NLTYPE_FIXED) + { + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); + } + else + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); + jump[2] = JUMP(SLJIT_GREATER); + add_jump(compiler, backtracks, JUMP(SLJIT_LESS)); + /* Equal. */ + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + + JUMPHERE(jump[1]); + if (common->nltype == NLTYPE_ANYCRLF) + { + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); + } + else + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0); + read_char_range(common, common->nlmin, common->nlmax, TRUE); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); + add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); + } + JUMPHERE(jump[2]); + JUMPHERE(jump[3]); + } + JUMPHERE(jump[0]); + check_partial(common, FALSE); + return cc; + + case OP_EOD: + add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); + check_partial(common, FALSE); + return cc; + + case OP_DOLL: + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + + if (!common->endonly) + compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks); + else + { + add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); + check_partial(common, FALSE); + } + return cc; + + case OP_DOLLM: + jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + check_partial(common, FALSE); + jump[0] = JUMP(SLJIT_JUMP); + JUMPHERE(jump[1]); + + if (common->nltype == NLTYPE_FIXED && common->newline > 255) + { + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (common->mode == JIT_COMPILE) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0)); + else + { + jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0); + /* STR_PTR = STR_END - IN_UCHARS(1) */ + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + check_partial(common, TRUE); + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + JUMPHERE(jump[1]); + } + + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); + } + else + { + peek_char(common, common->nlmax); + check_newlinechar(common, common->nltype, backtracks, FALSE); + } + JUMPHERE(jump[0]); + return cc; + + case OP_CIRC: + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); return cc; + case OP_CIRCM: + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); + jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0); + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + jump[0] = JUMP(SLJIT_JUMP); + JUMPHERE(jump[1]); + + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + if (common->nltype == NLTYPE_FIXED && common->newline > 255) + { + OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); + } + else + { + skip_char_back(common); + read_char_range(common, common->nlmin, common->nlmax, TRUE); + check_newlinechar(common, common->nltype, backtracks, FALSE); + } + JUMPHERE(jump[0]); + return cc; + + case OP_REVERSE: + length = GET(cc, 0); + if (length == 0) + return cc + LINK_SIZE; + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); +#ifdef SUPPORT_UTF + if (common->utf) + { + OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length); + label = LABEL(); + add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0)); + skip_char_back(common); + OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } + else +#endif + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0)); + } + check_start_used_ptr(common); + return cc + LINK_SIZE; + } +SLJIT_ASSERT_STOP(); +return cc; +} + +static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr) +{ +DEFINE_COMPILER; +int length; +unsigned int c, oc, bit; +compare_context context; +struct sljit_jump *jump[3]; +jump_list *end_list; +#ifdef SUPPORT_UTF +struct sljit_label *label; +#ifdef SUPPORT_UCP +pcre_uchar propdata[5]; +#endif +#endif /* SUPPORT_UTF */ + +switch(type) + { case OP_NOT_DIGIT: case OP_DIGIT: /* Digits are usually 0-9, so it is worth to optimize them. */ - if (common->digits[0] == -2) - get_ctype_ranges(common, ctype_digit, common->digits); - detect_partial_match(common, backtracks); - /* Flip the starting bit in the negative case. */ - if (type == OP_NOT_DIGIT) - common->digits[1] ^= 1; - if (!check_ranges(common, common->digits, backtracks, TRUE)) - { - read_char8_type(common); - OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit); - add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); - } - if (type == OP_NOT_DIGIT) - common->digits[1] ^= 1; + if (check_str_ptr) + detect_partial_match(common, backtracks); +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 + if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE)) + read_char7_type(common, type == OP_NOT_DIGIT); + else +#endif + read_char8_type(common, type == OP_NOT_DIGIT); + /* Flip the starting bit in the negative case. */ + OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit); + add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO)); return cc; case OP_NOT_WHITESPACE: case OP_WHITESPACE: - detect_partial_match(common, backtracks); - read_char8_type(common); + if (check_str_ptr) + detect_partial_match(common, backtracks); +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 + if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE)) + read_char7_type(common, type == OP_NOT_WHITESPACE); + else +#endif + read_char8_type(common, type == OP_NOT_WHITESPACE); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space); - add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); + add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO)); return cc; case OP_NOT_WORDCHAR: case OP_WORDCHAR: - detect_partial_match(common, backtracks); - read_char8_type(common); + if (check_str_ptr) + detect_partial_match(common, backtracks); +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 + if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE)) + read_char7_type(common, type == OP_NOT_WORDCHAR); + else +#endif + read_char8_type(common, type == OP_NOT_WORDCHAR); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word); - add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); + add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO)); return cc; case OP_ANY: - detect_partial_match(common, backtracks); - read_char(common); + if (check_str_ptr) + detect_partial_match(common, backtracks); + read_char_range(common, common->nlmin, common->nlmax, TRUE); if (common->nltype == NLTYPE_FIXED && common->newline > 255) { - jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); + jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); end_list = NULL; if (common->mode != JIT_PARTIAL_HARD_COMPILE) - add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); else check_str_end(common, &end_list); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff)); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff)); set_jumps(end_list, LABEL()); JUMPHERE(jump[0]); } @@ -4528,7 +6305,8 @@ switch(type) return cc; case OP_ALLANY: - detect_partial_match(common, backtracks); + if (check_str_ptr) + detect_partial_match(common, backtracks); #ifdef SUPPORT_UTF if (common->utf) { @@ -4536,14 +6314,14 @@ switch(type) OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16 #if defined COMPILE_PCRE8 - jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); #elif defined COMPILE_PCRE16 - jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); + jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); - OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); #endif @@ -4556,7 +6334,8 @@ switch(type) return cc; case OP_ANYBYTE: - detect_partial_match(common, backtracks); + if (check_str_ptr) + detect_partial_match(common, backtracks); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); return cc; @@ -4564,28 +6343,31 @@ switch(type) #ifdef SUPPORT_UCP case OP_NOTPROP: case OP_PROP: - propdata[0] = 0; + propdata[0] = XCL_HASPROP; propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP; propdata[2] = cc[0]; propdata[3] = cc[1]; propdata[4] = XCL_END; + if (check_str_ptr) + detect_partial_match(common, backtracks); compile_xclass_matchingpath(common, propdata, backtracks); return cc + 2; #endif #endif case OP_ANYNL: - detect_partial_match(common, backtracks); - read_char(common); - jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); + if (check_str_ptr) + detect_partial_match(common, backtracks); + read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE); + jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); /* We don't need to handle soft partial matching case. */ end_list = NULL; if (common->mode != JIT_PARTIAL_HARD_COMPILE) - add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); else check_str_end(common, &end_list); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); + jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); jump[2] = JUMP(SLJIT_JUMP); JUMPHERE(jump[0]); @@ -4597,52 +6379,55 @@ switch(type) case OP_NOT_HSPACE: case OP_HSPACE: - detect_partial_match(common, backtracks); - read_char(common); + if (check_str_ptr) + detect_partial_match(common, backtracks); + read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE); add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); + add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO)); return cc; case OP_NOT_VSPACE: case OP_VSPACE: - detect_partial_match(common, backtracks); - read_char(common); + if (check_str_ptr) + detect_partial_match(common, backtracks); + read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE); add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); + add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO)); return cc; #ifdef SUPPORT_UCP case OP_EXTUNI: - detect_partial_match(common, backtracks); + if (check_str_ptr) + detect_partial_match(common, backtracks); read_char(common); add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop)); /* Optimize register allocation: use a real register. */ - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0); - OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); + OP1(SLJIT_MOV_U8, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3); label = LABEL(); - jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); read_char(common); add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop)); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3); + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3); OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2); - OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable)); + OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable)); OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0); OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); - JUMPTO(SLJIT_C_NOT_ZERO, label); + JUMPTO(SLJIT_NOT_ZERO, label); OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); JUMPHERE(jump[0]); - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); if (common->mode == JIT_PARTIAL_HARD_COMPILE) { - jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); + jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); /* Since we successfully read a char above, partial matching must occure. */ check_partial(common, TRUE); JUMPHERE(jump[0]); @@ -4650,176 +6435,17 @@ switch(type) return cc; #endif - case OP_EODN: - /* Requires rather complex checks. */ - jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - if (common->nltype == NLTYPE_FIXED && common->newline > 255) - { - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - if (common->mode == JIT_COMPILE) - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0)); - else - { - jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL); - add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL)); - check_partial(common, TRUE); - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - JUMPHERE(jump[1]); - } - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); - } - else if (common->nltype == NLTYPE_FIXED) - { - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); - } - else - { - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); - jump[2] = JUMP(SLJIT_C_GREATER); - add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS)); - /* Equal. */ - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); - jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - - JUMPHERE(jump[1]); - if (common->nltype == NLTYPE_ANYCRLF) - { - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); - } - else - { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0); - read_char(common); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); - add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO)); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1); - } - JUMPHERE(jump[2]); - JUMPHERE(jump[3]); - } - JUMPHERE(jump[0]); - check_partial(common, FALSE); - return cc; - - case OP_EOD: - add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0)); - check_partial(common, FALSE); - return cc; - - case OP_CIRC: - OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); - add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0)); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); - return cc; - - case OP_CIRCM: - OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); - jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); - jump[0] = JUMP(SLJIT_JUMP); - JUMPHERE(jump[1]); - - add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - if (common->nltype == NLTYPE_FIXED && common->newline > 255) - { - OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0)); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); - } - else - { - skip_char_back(common); - read_char(common); - check_newlinechar(common, common->nltype, backtracks, FALSE); - } - JUMPHERE(jump[0]); - return cc; - - case OP_DOLL: - OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); - - if (!common->endonly) - compile_char1_matchingpath(common, OP_EODN, cc, backtracks); - else - { - add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0)); - check_partial(common, FALSE); - } - return cc; - - case OP_DOLLM: - jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); - OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); - check_partial(common, FALSE); - jump[0] = JUMP(SLJIT_JUMP); - JUMPHERE(jump[1]); - - if (common->nltype == NLTYPE_FIXED && common->newline > 255) - { - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - if (common->mode == JIT_COMPILE) - add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0)); - else - { - jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0); - /* STR_PTR = STR_END - IN_UCHARS(1) */ - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - check_partial(common, TRUE); - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - JUMPHERE(jump[1]); - } - - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); - } - else - { - peek_char(common); - check_newlinechar(common, common->nltype, backtracks, FALSE); - } - JUMPHERE(jump[0]); - return cc; - case OP_CHAR: case OP_CHARI: length = 1; #ifdef SUPPORT_UTF if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc); #endif - if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)) + if (common->mode == JIT_COMPILE && check_str_ptr + && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)) { OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); - add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); context.length = IN_UCHARS(length); context.sourcereg = -1; @@ -4828,8 +6454,9 @@ switch(type) #endif return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks); } - detect_partial_match(common, backtracks); - read_char(common); + + if (check_str_ptr) + detect_partial_match(common, backtracks); #ifdef SUPPORT_UTF if (common->utf) { @@ -4838,29 +6465,31 @@ switch(type) else #endif c = *cc; + if (type == OP_CHAR || !char_has_othercase(common, cc)) { - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c)); + read_char_range(common, c, c, FALSE); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c)); return cc + length; } oc = char_othercase(common, c); + read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE); bit = c ^ oc; if (is_powerof2(bit)) { OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); return cc + length; } - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO)); + jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc)); + JUMPHERE(jump[0]); return cc + length; case OP_NOT: case OP_NOTI: - detect_partial_match(common, backtracks); + if (check_str_ptr) + detect_partial_match(common, backtracks); length = 1; #ifdef SUPPORT_UTF if (common->utf) @@ -4869,18 +6498,18 @@ switch(type) c = *cc; if (c < 128) { - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); if (type == OP_NOT || !char_has_othercase(common, cc)) - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c)); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); else { /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */ OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20); - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20)); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20)); } /* Skip the variable-length character. */ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); + jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); JUMPHERE(jump[0]); @@ -4890,101 +6519,90 @@ switch(type) #endif /* COMPILE_PCRE8 */ { GETCHARLEN(c, cc, length); - read_char(common); } } else #endif /* SUPPORT_UTF */ - { - read_char(common); c = *cc; - } if (type == OP_NOT || !char_has_othercase(common, cc)) - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c)); + { + read_char_range(common, c, c, TRUE); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); + } else { oc = char_othercase(common, c); + read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE); bit = c ^ oc; if (is_powerof2(bit)) { OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit); - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); } else { - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c)); - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc)); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc)); } } return cc + length; case OP_CLASS: case OP_NCLASS: - detect_partial_match(common, backtracks); - read_char(common); - if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks)) + if (check_str_ptr) + detect_partial_match(common, backtracks); + +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 + bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255; + read_char_range(common, 0, bit, type == OP_NCLASS); +#else + read_char_range(common, 0, 255, type == OP_NCLASS); +#endif + + if (check_class_ranges(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks)) return cc + 32 / sizeof(pcre_uchar); -#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 jump[0] = NULL; -#ifdef COMPILE_PCRE8 - /* This check only affects 8 bit mode. In other modes, we - always need to compare the value with 255. */ if (common->utf) -#endif /* COMPILE_PCRE8 */ { - jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); + jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit); if (type == OP_CLASS) { add_jump(compiler, backtracks, jump[0]); jump[0] = NULL; } } -#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */ +#elif !defined COMPILE_PCRE8 + jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); + if (type == OP_CLASS) + { + add_jump(compiler, backtracks, jump[0]); + jump[0] = NULL; + } +#endif /* SUPPORT_UTF && COMPILE_PCRE8 */ + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); - add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO)); + add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); + #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 if (jump[0] != NULL) JUMPHERE(jump[0]); -#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */ +#endif return cc + 32 / sizeof(pcre_uchar); #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 case OP_XCLASS: + if (check_str_ptr) + detect_partial_match(common, backtracks); compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks); return cc + GET(cc, 0) - 1; #endif - - case OP_REVERSE: - length = GET(cc, 0); - if (length == 0) - return cc + LINK_SIZE; - OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); -#ifdef SUPPORT_UTF - if (common->utf) - { - OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length); - label = LABEL(); - add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0)); - skip_char_back(common); - OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); - JUMPTO(SLJIT_C_NOT_ZERO, label); - } - else -#endif - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); - add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0)); - } - check_start_used_ptr(common); - return cc + LINK_SIZE; } SLJIT_ASSERT_STOP(); return cc; @@ -5042,7 +6660,7 @@ if (context.length > 0) { /* We have a fixed-length byte sequence. */ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length); - add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); context.sourcereg = -1; #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED @@ -5053,29 +6671,7 @@ if (context.length > 0) } /* A non-fixed length character will be checked if length == 0. */ -return compile_char1_matchingpath(common, *cc, cc + 1, backtracks); -} - -static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks) -{ -DEFINE_COMPILER; -int offset = GET2(cc, 1) << 1; - -OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); -if (!common->jscript_compat) - { - if (backtracks == NULL) - { - /* OVECTOR(1) contains the "string begin - 1" constant. */ - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); - OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); - return JUMP(SLJIT_C_NOT_ZERO); - } - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); - } -return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); +return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE); } /* Forward definitions. */ @@ -5110,39 +6706,80 @@ static void compile_backtrackingpath(compiler_common *, struct backtrack_common #define BACKTRACK_AS(type) ((type *)backtrack) -static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail) +static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks) { +/* The OVECTOR offset goes to TMP2. */ DEFINE_COMPILER; -int offset = GET2(cc, 1) << 1; +int count = GET2(cc, 1 + IMM2_SIZE); +pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size; +unsigned int offset; +jump_list *found = NULL; + +SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI); + +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); + +count--; +while (count-- > 0) + { + offset = GET2(slot, 0) << 1; + GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset)); + add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0)); + slot += common->name_entry_size; + } + +offset = GET2(slot, 0) << 1; +GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset)); +if (backtracks != NULL && !common->jscript_compat) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0)); + +set_jumps(found, LABEL()); +} + +static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail) +{ +DEFINE_COMPILER; +BOOL ref = (*cc == OP_REF || *cc == OP_REFI); +int offset = 0; struct sljit_jump *jump = NULL; struct sljit_jump *partial; struct sljit_jump *nopartial; -OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); -/* OVECTOR(1) contains the "string begin - 1" constant. */ -if (withchecks && !common->jscript_compat) - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); +if (ref) + { + offset = GET2(cc, 1) << 1; + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); + /* OVECTOR(1) contains the "string begin - 1" constant. */ + if (withchecks && !common->jscript_compat) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); + } +else + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); #if defined SUPPORT_UTF && defined SUPPORT_UCP if (common->utf && *cc == OP_REFI) { - SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); + SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2); + if (ref) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + else + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + if (withchecks) - jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0); + jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0); /* Needed to save important temporary registers. */ - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0); - OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); + OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0); sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp)); - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); if (common->mode == JIT_COMPILE) - add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1)); else { - add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); - nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1); + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); + nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1); check_partial(common, FALSE); add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); JUMPHERE(nopartial); @@ -5152,17 +6789,21 @@ if (common->utf && *cc == OP_REFI) else #endif /* SUPPORT_UTF && SUPPORT_UCP */ { - OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0); + if (ref) + OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0); + else + OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0); + if (withchecks) - jump = JUMP(SLJIT_C_ZERO); + jump = JUMP(SLJIT_ZERO); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); - partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0); + partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0); if (common->mode == JIT_COMPILE) add_jump(compiler, backtracks, partial); add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); if (common->mode != JIT_COMPILE) { @@ -5171,10 +6812,10 @@ else /* TMP2 -= STR_END - STR_PTR */ OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0); - partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0); + partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0); OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); - add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); JUMPHERE(partial); check_partial(common, FALSE); add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); @@ -5189,14 +6830,15 @@ if (jump != NULL) else JUMPHERE(jump); } -return cc + 1 + IMM2_SIZE; } static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) { DEFINE_COMPILER; +BOOL ref = (*cc == OP_REF || *cc == OP_REFI); backtrack_common *backtrack; pcre_uchar type; +int offset = 0; struct sljit_label *label; struct sljit_jump *zerolength; struct sljit_jump *jump = NULL; @@ -5204,9 +6846,15 @@ pcre_uchar *ccbegin = cc; int min = 0, max = 0; BOOL minimize; -PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL); +PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL); +if (ref) + offset = GET2(cc, 1) << 1; +else + cc += IMM2_SIZE; type = cc[1 + IMM2_SIZE]; + +SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even); minimize = (type & 0x1) != 0; switch(type) { @@ -5244,37 +6892,64 @@ if (!minimize) if (min == 0) { allocate_stack(common, 2); + if (ref) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); /* Temporary release of STR_PTR. */ OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); - zerolength = compile_ref_checks(common, ccbegin, NULL); + /* Handles both invalid and empty cases. Since the minimum repeat, + is zero the invalid case is basically the same as an empty case. */ + if (ref) + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + else + { + compile_dnref_search(common, ccbegin, NULL); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0); + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + } /* Restore if not zero length. */ OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); } else { allocate_stack(common, 1); + if (ref) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks); + if (ref) + { + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + } + else + { + compile_dnref_search(common, ccbegin, &backtrack->topbacktracks); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0); + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + } } if (min > 1 || max > 1) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0); label = LABEL(); + if (!ref) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1); compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE); if (min > 1 || max > 1) { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0); if (min > 1) - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label); + CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label); if (max > 1) { - jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max); + jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max); allocate_stack(common, 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); JUMPTO(SLJIT_JUMP, label); @@ -5291,30 +6966,58 @@ if (!minimize) } JUMPHERE(zerolength); - BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); + BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL(); count_match(common); return cc; } -allocate_stack(common, 2); +allocate_stack(common, ref ? 2 : 3); +if (ref) + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); if (type != OP_CRMINSTAR) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); if (min == 0) { - zerolength = compile_ref_checks(common, ccbegin, NULL); + /* Handles both invalid and empty cases. Since the minimum repeat, + is zero the invalid case is basically the same as an empty case. */ + if (ref) + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + else + { + compile_dnref_search(common, ccbegin, NULL); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0); + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + } + /* Length is non-zero, we can match real repeats. */ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); jump = JUMP(SLJIT_JUMP); } else - zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks); + { + if (ref) + { + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); + } + else + { + compile_dnref_search(common, ccbegin, &backtrack->topbacktracks); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0); + zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + } + } -BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); +BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL(); if (max > 0) - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max)); + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max)); +if (!ref) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); @@ -5323,7 +7026,7 @@ if (min > 1) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath); + CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath); } else if (max > 0) OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1); @@ -5383,15 +7086,15 @@ if (entry == NULL) if (common->has_set_som && common->mark_ptr != 0) { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); allocate_stack(common, 2); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); } else if (common->has_set_som || common->mark_ptr != 0) { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr); allocate_stack(common, 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); } @@ -5401,11 +7104,11 @@ if (entry->entry == NULL) else JUMPTO(SLJIT_FAST_CALL, entry->entry); /* Leave if the match is failed. */ -add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0)); +add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0)); return cc + 1 + LINK_SIZE; } -static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector) +static int SLJIT_CALL do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector) { const pcre_uchar *begin = arguments->begin; int *offset_vector = arguments->offsets; @@ -5465,45 +7168,71 @@ PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw)); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr); -OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); SLJIT_ASSERT(common->capture_last_ptr != 0); -OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]); -OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); +OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); +OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]); +OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0); /* These pointer sized fields temporarly stores internal variables. */ -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0); if (common->mark_ptr != 0) OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr)); -OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2)); -OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE)); +OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2)); +OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0); /* Needed to save important temporary registers. */ -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0); -OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE); -GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); +OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE); +GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START); sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout)); -OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0); -OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); +OP1(SLJIT_MOV_S32, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0); +OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw)); /* Check return value. */ OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); -add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER)); +add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER)); if (common->forced_quit_label == NULL) - add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS)); + add_jump(compiler, &common->forced_quit, JUMP(SLJIT_SIG_LESS)); else - JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label); + JUMPTO(SLJIT_SIG_LESS, common->forced_quit_label); return cc + 2 + 2 * LINK_SIZE; } #undef CALLOUT_ARG_SIZE #undef CALLOUT_ARG_OFFSET +static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(pcre_uchar *cc) +{ +while (TRUE) + { + switch (*cc) + { + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_CIRC: + case OP_CIRCM: + case OP_DOLL: + case OP_DOLLM: + case OP_CALLOUT: + case OP_ALT: + cc += PRIV(OP_lengths)[*cc]; + break; + + case OP_KET: + return FALSE; + + default: + return TRUE; + } + } +} + static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional) { DEFINE_COMPILER; @@ -5555,21 +7284,34 @@ if (bra == OP_BRAMINZERO) /* This is a braminzero backtrack path. */ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); - brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); + brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); } if (framesize < 0) { - extrasize = needs_control_head ? 2 : 1; + extrasize = 1; + if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE)) + extrasize = 0; + + if (needs_control_head) + extrasize++; + if (framesize == no_frame) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0); - allocate_stack(common, extrasize); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0); + + if (extrasize > 0) + allocate_stack(common, extrasize); + if (needs_control_head) - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + + if (extrasize > 0) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + if (needs_control_head) { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0); + SLJIT_ASSERT(extrasize == 2); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); } } @@ -5577,20 +7319,23 @@ else { extrasize = needs_control_head ? 3 : 2; allocate_stack(common, framesize + extrasize); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); if (needs_control_head) - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + if (needs_control_head) { OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); } else OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); + init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE); } @@ -5614,7 +7359,7 @@ while (1) altbacktrack.top = NULL; altbacktrack.topbacktracks = NULL; - if (*ccbegin == OP_ALT) + if (*ccbegin == OP_ALT && extrasize > 0) OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); altbacktrack.cc = ccbegin; @@ -5642,26 +7387,27 @@ while (1) if (framesize < 0) { if (framesize == no_frame) - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - else + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + else if (extrasize > 0) free_stack(common, extrasize); + if (needs_control_head) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0); } else { if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional) { /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ - OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); + OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); if (needs_control_head) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0); } else { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); if (needs_control_head) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw)); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); } } @@ -5670,7 +7416,10 @@ while (1) { /* We know that STR_PTR was stored on the top of the stack. */ if (conditional) - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0); + { + if (extrasize > 0) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0); + } else if (bra == OP_BRAZERO) { if (framesize < 0) @@ -5679,7 +7428,7 @@ while (1) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw)); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); } OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); @@ -5687,7 +7436,7 @@ while (1) else if (framesize >= 0) { /* For OP_BRA and OP_BRAMINZERO. */ - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw)); } } add_jump(compiler, found, JUMP(SLJIT_JUMP)); @@ -5731,10 +7480,10 @@ if (common->positive_assert_quit != NULL) set_jumps(common->positive_assert_quit, LABEL()); SLJIT_ASSERT(framesize != no_stack); if (framesize < 0) - OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw)); + OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw)); else { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw)); } @@ -5742,12 +7491,12 @@ if (common->positive_assert_quit != NULL) } if (needs_control_head) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1)); if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) { /* Assert is failed. */ - if (conditional || bra == OP_BRAZERO) + if ((conditional && extrasize > 0) || bra == OP_BRAZERO) OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); if (framesize < 0) @@ -5759,7 +7508,7 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) free_stack(common, 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); } - else + else if (extrasize > 0) free_stack(common, extrasize); } else @@ -5773,7 +7522,7 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) } else free_stack(common, framesize + extrasize); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); } jump = JUMP(SLJIT_JUMP); if (bra != OP_BRAZERO) @@ -5784,7 +7533,9 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) if (framesize < 0) { /* We know that STR_PTR was stored on the top of the stack. */ - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw)); + if (extrasize > 0) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw)); + /* Keep the STR_PTR on the top of the stack. */ if (bra == OP_BRAZERO) { @@ -5803,13 +7554,13 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) if (bra == OP_BRA) { /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ - OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); + OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw)); } else { /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ - OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw)); + OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw)); if (extrasize == 2) { OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); @@ -5835,9 +7586,9 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) JUMPHERE(brajump); if (framesize >= 0) { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw)); } set_jumps(backtrack->common.topbacktracks, LABEL()); } @@ -5847,14 +7598,16 @@ else /* AssertNot is successful. */ if (framesize < 0) { - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + if (extrasize > 0) + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + if (bra != OP_BRA) { if (extrasize == 2) free_stack(common, 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); } - else + else if (extrasize > 0) free_stack(common, extrasize); } else @@ -5869,7 +7622,7 @@ else } else free_stack(common, framesize + extrasize); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); } if (bra == OP_BRAZERO) @@ -5902,116 +7655,6 @@ common->accept = save_accept; return cc + 1 + LINK_SIZE; } -static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table) -{ -int condition = FALSE; -pcre_uchar *slotA = name_table; -pcre_uchar *slotB; -sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)]; -sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)]; -sljit_sw no_capture; -int i; - -locals += refno & 0xff; -refno >>= 8; -no_capture = locals[1]; - -for (i = 0; i < name_count; i++) - { - if (GET2(slotA, 0) == refno) break; - slotA += name_entry_size; - } - -if (i < name_count) - { - /* Found a name for the number - there can be only one; duplicate names - for different numbers are allowed, but not vice versa. First scan down - for duplicates. */ - - slotB = slotA; - while (slotB > name_table) - { - slotB -= name_entry_size; - if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) - { - condition = locals[GET2(slotB, 0) << 1] != no_capture; - if (condition) break; - } - else break; - } - - /* Scan up for duplicates */ - if (!condition) - { - slotB = slotA; - for (i++; i < name_count; i++) - { - slotB += name_entry_size; - if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) - { - condition = locals[GET2(slotB, 0) << 1] != no_capture; - if (condition) break; - } - else break; - } - } - } -return condition; -} - -static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table) -{ -int condition = FALSE; -pcre_uchar *slotA = name_table; -pcre_uchar *slotB; -sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)]; -sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)]; -sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)]; -sljit_uw i; - -for (i = 0; i < name_count; i++) - { - if (GET2(slotA, 0) == recno) break; - slotA += name_entry_size; - } - -if (i < name_count) - { - /* Found a name for the number - there can be only one; duplicate - names for different numbers are allowed, but not vice versa. First - scan down for duplicates. */ - - slotB = slotA; - while (slotB > name_table) - { - slotB -= name_entry_size; - if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) - { - condition = GET2(slotB, 0) == group_num; - if (condition) break; - } - else break; - } - - /* Scan up for duplicates */ - if (!condition) - { - slotB = slotA; - for (i++; i < name_count; i++) - { - slotB += name_entry_size; - if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) - { - condition = GET2(slotB, 0) == group_num; - if (condition) break; - } - else break; - } - } - } -return condition; -} - static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head) { DEFINE_COMPILER; @@ -6020,13 +7663,15 @@ int stacksize; if (framesize < 0) { if (framesize == no_frame) - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); else { stacksize = needs_control_head ? 1 : 0; if (ket != OP_KET || has_alternatives) stacksize++; - free_stack(common, stacksize); + + if (stacksize > 0) + free_stack(common, stacksize); } if (needs_control_head) @@ -6038,13 +7683,13 @@ if (framesize < 0) else if (ket == OP_KETRMIN) { /* Move the STR_PTR to the private_data_ptr. */ - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0); } } else { stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1; - OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw)); + OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw)); if (needs_control_head) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0); @@ -6055,7 +7700,7 @@ else } } if (needs_control_head) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0); } static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr) @@ -6064,20 +7709,20 @@ DEFINE_COMPILER; if (common->capture_last_ptr != 0) { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); stacksize++; } if (common->optimized_cbracket[offset >> 1] == 0) { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); stacksize += 2; } return stacksize; @@ -6144,11 +7789,12 @@ backtrack_common *backtrack; pcre_uchar opcode; int private_data_ptr = 0; int offset = 0; -int stacksize; +int i, stacksize; int repeat_ptr = 0, repeat_length = 0; int repeat_type = 0, repeat_count = 0; pcre_uchar *ccbegin; pcre_uchar *matchingpath; +pcre_uchar *slot; pcre_uchar bra = OP_BRA; pcre_uchar ket; assert_backtrack *assert; @@ -6198,20 +7844,8 @@ SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO cc += GET(cc, 1); has_alternatives = *cc == OP_ALT; -if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) - { - has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE; - if (*matchingpath == OP_NRREF) - { - stacksize = GET2(matchingpath, 1); - if (common->currententry == NULL || stacksize == RREF_ANY) - has_alternatives = FALSE; - else if (common->currententry->start == 0) - has_alternatives = stacksize != 0; - else - has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); - } - } +if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND)) + has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) ? FALSE : TRUE; if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) opcode = OP_SCOND; @@ -6272,13 +7906,13 @@ if (bra == OP_BRAMINZERO) if (ket != OP_KETRMIN) { free_stack(common, 1); - braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); + braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); } else { if (opcode == OP_ONCE || opcode >= OP_SBRA) { - jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); + jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); /* Nothing stored during the first run. */ skip = JUMP(SLJIT_JUMP); @@ -6287,19 +7921,19 @@ if (bra == OP_BRAMINZERO) if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0) { /* When we come from outside, private_data_ptr contains the previous STR_PTR. */ - braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); } else { /* Except when the whole stack frame must be saved. */ - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw)); } JUMPHERE(skip); } else { - jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); + jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); JUMPHERE(jump); } @@ -6308,7 +7942,7 @@ if (bra == OP_BRAMINZERO) if (repeat_type != 0) { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count); if (repeat_type == OP_EXACT) rmax_label = LABEL(); } @@ -6329,7 +7963,7 @@ if (opcode == OP_ONCE) stacksize = 0; if (needs_control_head) { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); stacksize++; } @@ -6340,12 +7974,12 @@ if (opcode == OP_ONCE) { stacksize += 2; if (!needs_control_head) - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); } else { if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0); if (ket == OP_KETRMAX || has_alternatives) stacksize++; } @@ -6363,10 +7997,10 @@ if (opcode == OP_ONCE) if (ket == OP_KETRMIN) { if (needs_control_head) - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame) - OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw)); + OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0); } else if (ket == OP_KETRMAX || has_alternatives) @@ -6383,20 +8017,20 @@ if (opcode == OP_ONCE) if (needs_control_head) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw)); stacksize = needs_control_head ? 1 : 0; if (ket != OP_KET || has_alternatives) { OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); stacksize++; OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); } else { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); } init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE); @@ -6409,26 +8043,26 @@ else if (opcode == OP_CBRA || opcode == OP_SCBRA) { SLJIT_ASSERT(private_data_ptr == OVECTOR(offset)); allocate_stack(common, 2); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); } else { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); allocate_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); } } else if (opcode == OP_SBRA || opcode == OP_SCOND) { /* Saving the previous value. */ - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); allocate_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); } else if (has_alternatives) @@ -6445,50 +8079,78 @@ if (opcode == OP_COND || opcode == OP_SCOND) { SLJIT_ASSERT(has_alternatives); add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), - CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); + CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); matchingpath += 1 + IMM2_SIZE; } - else if (*matchingpath == OP_NCREF) + else if (*matchingpath == OP_DNCREF) { SLJIT_ASSERT(has_alternatives); - stacksize = GET2(matchingpath, 1); - jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); - - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size); - OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw))); - GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0); - OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table); - sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector)); - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1); - add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0)); - JUMPHERE(jump); - matchingpath += 1 + IMM2_SIZE; + i = GET2(matchingpath, 1 + IMM2_SIZE); + slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size; + OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); + OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0); + slot += common->name_entry_size; + i--; + while (i-- > 0) + { + OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0); + OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0); + slot += common->name_entry_size; + } + OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); + add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO)); + matchingpath += 1 + 2 * IMM2_SIZE; } - else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF) + else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) { /* Never has other case. */ BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL; + SLJIT_ASSERT(!has_alternatives); - stacksize = GET2(matchingpath, 1); - if (common->currententry == NULL) + if (*matchingpath == OP_FAIL) stacksize = 0; - else if (stacksize == RREF_ANY) - stacksize = 1; - else if (common->currententry->start == 0) - stacksize = stacksize == 0; - else - stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); - - if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL) + if (*matchingpath == OP_RREF) { - SLJIT_ASSERT(!has_alternatives); + stacksize = GET2(matchingpath, 1); + if (common->currententry == NULL) + stacksize = 0; + else if (stacksize == RREF_ANY) + stacksize = 1; + else if (common->currententry->start == 0) + stacksize = stacksize == 0; + else + stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); + if (stacksize != 0) matchingpath += 1 + IMM2_SIZE; + } + else + { + if (common->currententry == NULL || common->currententry->start == 0) + stacksize = 0; else { + stacksize = GET2(matchingpath, 1 + IMM2_SIZE); + slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size; + i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); + while (stacksize > 0) + { + if ((int)GET2(slot, 0) == i) + break; + slot += common->name_entry_size; + stacksize--; + } + } + + if (stacksize != 0) + matchingpath += 1 + 2 * IMM2_SIZE; + } + + /* The stacksize == 0 is a common "else" case. */ + if (stacksize == 0) + { if (*cc == OP_ALT) { matchingpath = cc + 1 + LINK_SIZE; @@ -6497,24 +8159,6 @@ if (opcode == OP_COND || opcode == OP_SCOND) else matchingpath = cc; } - } - else - { - SLJIT_ASSERT(has_alternatives); - - stacksize = GET2(matchingpath, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE)); - OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize); - GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0); - OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table); - sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups)); - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1); - add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0)); - matchingpath += 1 + IMM2_SIZE; - } } else { @@ -6541,7 +8185,7 @@ stacksize = 0; if (repeat_type == OP_MINUPTO) { /* We need to preserve the counter. TMP2 will be used below. */ - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr); stacksize++; } if (ket != OP_KET || bra != OP_BRA) @@ -6591,7 +8235,7 @@ if (has_alternatives) if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0) { SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); } if (ket == OP_KETRMAX) @@ -6600,8 +8244,8 @@ if (ket == OP_KETRMAX) { if (has_alternatives) BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1); - JUMPTO(SLJIT_C_NOT_ZERO, rmax_label); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, rmax_label); /* Drop STR_PTR for greedy plus quantifier. */ if (opcode != OP_ONCE) free_stack(common, 1); @@ -6613,14 +8257,14 @@ if (ket == OP_KETRMAX) /* Checking zero-length iteration. */ if (opcode != OP_ONCE) { - CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmax_label); + CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label); /* Drop STR_PTR for greedy plus quantifier. */ if (bra != OP_BRAZERO) free_stack(common, 1); } else /* TMP2 must contain the starting STR_PTR. */ - CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label); + CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label); } else JUMPTO(SLJIT_JUMP, rmax_label); @@ -6629,13 +8273,14 @@ if (ket == OP_KETRMAX) if (repeat_type == OP_EXACT) { - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1); - JUMPTO(SLJIT_C_NOT_ZERO, rmax_label); + count_match(common); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, rmax_label); } else if (repeat_type == OP_UPTO) { /* We need to preserve the counter. */ - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr); allocate_stack(common, 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); } @@ -6655,7 +8300,7 @@ if (bra == OP_BRAMINZERO) framesize is < 0, OP_ONCE will do the release itself. */ if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0) { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); } else if (ket == OP_KETRMIN && opcode != OP_ONCE) @@ -6672,9 +8317,13 @@ while (*cc == OP_ALT) cc += GET(cc, 1); cc += 1 + LINK_SIZE; -/* Temporarily encoding the needs_control_head in framesize. */ if (opcode == OP_ONCE) + { + /* We temporarily encode the needs_control_head in the lowest bit. + Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns + the same value for small signed numbers (including negative numbers). */ BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0); + } return cc + repeat_length; } @@ -6750,20 +8399,20 @@ if (framesize < 0) BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize; allocate_stack(common, stacksize); if (framesize == no_frame) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0); stack = 0; if (offset != 0) { stack = 2; - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); if (common->capture_last_ptr != 0) - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); if (needs_control_head) - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); if (common->capture_last_ptr != 0) { OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0); @@ -6773,7 +8422,7 @@ if (framesize < 0) else { if (needs_control_head) - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); stack = 1; } @@ -6800,10 +8449,10 @@ else BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize; allocate_stack(common, stacksize); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); if (needs_control_head) - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr); - OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1)); stack = 0; if (!zero) @@ -6827,7 +8476,7 @@ else } if (offset != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0); loop = LABEL(); while (*cc != OP_KETRPOS) @@ -6843,16 +8492,16 @@ while (*cc != OP_KETRPOS) if (framesize < 0) { if (framesize == no_frame) - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); if (offset != 0) { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0); if (common->capture_last_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); } else { @@ -6861,8 +8510,12 @@ while (*cc != OP_KETRPOS) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); } + /* Even if the match is empty, we need to reset the control head. */ + if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack)); + if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS) - add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0)); + add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0)); if (!zero) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0); @@ -6871,25 +8524,29 @@ while (*cc != OP_KETRPOS) { if (offset != 0) { - OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw)); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0); + OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0); if (common->capture_last_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); } else { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw)); if (opcode == OP_SBRAPOS) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw)); OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0); } + /* Even if the match is empty, we need to reset the control head. */ + if (needs_control_head) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack)); + if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS) - add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0)); + add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0)); if (!zero) { @@ -6900,9 +8557,6 @@ while (*cc != OP_KETRPOS) } } - if (needs_control_head) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack)); - JUMPTO(SLJIT_JUMP, loop); flush_stubs(common); @@ -6914,7 +8568,7 @@ while (*cc != OP_KETRPOS) if (framesize < 0) { if (offset != 0) - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); else OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); } @@ -6924,12 +8578,12 @@ while (*cc != OP_KETRPOS) { /* Last alternative. */ if (*cc == OP_KETRPOS) - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); } else { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw)); } } @@ -6945,9 +8599,9 @@ backtrack->topbacktracks = NULL; if (!zero) { if (framesize < 0) - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0)); + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0)); else /* TMP2 is set to [private_data_ptr] above. */ - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0)); + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0)); } /* None of them matched. */ @@ -6956,11 +8610,13 @@ count_match(common); return cc + 1 + LINK_SIZE; } -static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end) +static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, sljit_u32 *max, sljit_u32 *exact, pcre_uchar **end) { int class_len; *opcode = *cc; +*exact = 0; + if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO) { cc++; @@ -6988,63 +8644,114 @@ else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO) { cc++; *opcode -= OP_TYPESTAR - OP_STAR; - *type = 0; + *type = OP_END; } else { - SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS); + SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS); *type = *opcode; cc++; class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0); *opcode = cc[class_len - 1]; + if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY) { *opcode -= OP_CRSTAR - OP_STAR; - if (end != NULL) - *end = cc + class_len; + *end = cc + class_len; + + if (*opcode == OP_PLUS || *opcode == OP_MINPLUS) + { + *exact = 1; + *opcode -= OP_PLUS - OP_STAR; + } } - else + else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY) { - SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE); - *arg1 = GET2(cc, (class_len + IMM2_SIZE)); - *arg2 = GET2(cc, class_len); + *opcode -= OP_CRPOSSTAR - OP_POSSTAR; + *end = cc + class_len; - if (*arg2 == 0) + if (*opcode == OP_POSPLUS) { - SLJIT_ASSERT(*arg1 != 0); - *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO; + *exact = 1; + *opcode = OP_POSSTAR; } - if (*arg1 == *arg2) - *opcode = OP_EXACT; + } + else + { + SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE); + *max = GET2(cc, (class_len + IMM2_SIZE)); + *exact = GET2(cc, class_len); - if (end != NULL) - *end = cc + class_len + 2 * IMM2_SIZE; + if (*max == 0) + { + if (*opcode == OP_CRPOSRANGE) + *opcode = OP_POSSTAR; + else + *opcode -= OP_CRRANGE - OP_STAR; + } + else + { + *max -= *exact; + if (*max == 0) + *opcode = OP_EXACT; + else if (*max == 1) + { + if (*opcode == OP_CRPOSRANGE) + *opcode = OP_POSQUERY; + else + *opcode -= OP_CRRANGE - OP_QUERY; + } + else + { + if (*opcode == OP_CRPOSRANGE) + *opcode = OP_POSUPTO; + else + *opcode -= OP_CRRANGE - OP_UPTO; + } + } + *end = cc + class_len + 2 * IMM2_SIZE; } return cc; } -if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO) +switch(*opcode) { - *arg1 = GET2(cc, 0); + case OP_EXACT: + *exact = GET2(cc, 0); cc += IMM2_SIZE; + break; + + case OP_PLUS: + case OP_MINPLUS: + *exact = 1; + *opcode -= OP_PLUS - OP_STAR; + break; + + case OP_POSPLUS: + *exact = 1; + *opcode = OP_POSSTAR; + break; + + case OP_UPTO: + case OP_MINUPTO: + case OP_POSUPTO: + *max = GET2(cc, 0); + cc += IMM2_SIZE; + break; } -if (*type == 0) +if (*type == OP_END) { *type = *cc; - if (end != NULL) - *end = next_opcode(common, cc); + *end = next_opcode(common, cc); cc++; return cc; } -if (end != NULL) - { - *end = cc + 1; +*end = cc + 1; #ifdef SUPPORT_UTF - if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc); +if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc); #endif - } return cc; } @@ -7054,95 +8761,110 @@ DEFINE_COMPILER; backtrack_common *backtrack; pcre_uchar opcode; pcre_uchar type; -int arg1 = -1, arg2 = -1; -pcre_uchar* end; -jump_list *nomatch = NULL; +sljit_u32 max = 0, exact; +BOOL fast_fail; +sljit_s32 fast_str_ptr; +BOOL charpos_enabled; +pcre_uchar charpos_char; +unsigned int charpos_othercasebit; +pcre_uchar *end; +jump_list *no_match = NULL; +jump_list *no_char1_match = NULL; struct sljit_jump *jump = NULL; struct sljit_label *label; int private_data_ptr = PRIVATE_DATA(cc); -int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG); +int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP); int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); int tmp_base, tmp_offset; -PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL); +PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL); -cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end); +fast_str_ptr = PRIVATE_DATA(cc + 1); +fast_fail = TRUE; -switch(type) +SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr); + +if (cc == common->fast_forward_bc_ptr) + fast_fail = FALSE; +else if (common->fast_fail_start_ptr == 0) + fast_str_ptr = 0; + +SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0 + || (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr)); + +cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end); + +if (type != OP_EXTUNI) { - case OP_NOT_DIGIT: - case OP_DIGIT: - case OP_NOT_WHITESPACE: - case OP_WHITESPACE: - case OP_NOT_WORDCHAR: - case OP_WORDCHAR: - case OP_ANY: - case OP_ALLANY: - case OP_ANYBYTE: - case OP_ANYNL: - case OP_NOT_HSPACE: - case OP_HSPACE: - case OP_NOT_VSPACE: - case OP_VSPACE: - case OP_CHAR: - case OP_CHARI: - case OP_NOT: - case OP_NOTI: - case OP_CLASS: - case OP_NCLASS: tmp_base = TMP3; tmp_offset = 0; - break; + } +else + { + tmp_base = SLJIT_MEM1(SLJIT_SP); + tmp_offset = POSSESSIVE0; + } - default: - SLJIT_ASSERT_STOP(); - /* Fall through. */ +if (fast_fail && fast_str_ptr != 0) + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr)); - case OP_EXTUNI: - case OP_XCLASS: - case OP_NOTPROP: - case OP_PROP: - tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG); - tmp_offset = POSSESSIVE0; - break; +/* Handle fixed part first. */ +if (exact > 1) + { + SLJIT_ASSERT(fast_str_ptr == 0); + if (common->mode == JIT_COMPILE +#ifdef SUPPORT_UTF + && !common->utf +#endif + ) + { + OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact)); + add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0)); + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE); + OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } + else + { + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE); + OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } } +else if (exact == 1) + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE); switch(opcode) { case OP_STAR: - case OP_PLUS: case OP_UPTO: - case OP_CRRANGE: + SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR); + if (type == OP_ANYNL || type == OP_EXTUNI) { SLJIT_ASSERT(private_data_ptr == 0); - if (opcode == OP_STAR || opcode == OP_UPTO) - { - allocate_stack(common, 2); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); - } - else - { - allocate_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - } + SLJIT_ASSERT(fast_str_ptr == 0); - if (opcode == OP_UPTO || opcode == OP_CRRANGE) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0); + allocate_stack(common, 2); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); + + if (opcode == OP_UPTO) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max); label = LABEL(); - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); - if (opcode == OP_UPTO || opcode == OP_CRRANGE) + compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE); + if (opcode == OP_UPTO) { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - if (opcode == OP_CRRANGE && arg2 > 0) - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label); - if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0)) - jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0); + OP2(SLJIT_SUB | SLJIT_SET_E, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); + jump = JUMP(SLJIT_ZERO); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0); } /* We cannot use TMP3 because of this allocate_stack. */ @@ -7154,106 +8876,268 @@ switch(opcode) } else { - if (opcode == OP_PLUS) - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); - if (private_data_ptr == 0) - allocate_stack(common, 2); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - if (opcode <= OP_PLUS) + charpos_enabled = FALSE; + charpos_char = 0; + charpos_othercasebit = 0; + + if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI)) + { + charpos_enabled = TRUE; +#ifdef SUPPORT_UTF + charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]); +#endif + if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1)) + { + charpos_othercasebit = char_get_othercase_bit(common, end + 1); + if (charpos_othercasebit == 0) + charpos_enabled = FALSE; + } + + if (charpos_enabled) + { + charpos_char = end[1]; + /* Consumpe the OP_CHAR opcode. */ + end += 2; +#if defined COMPILE_PCRE8 + SLJIT_ASSERT((charpos_othercasebit >> 8) == 0); +#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 + SLJIT_ASSERT((charpos_othercasebit >> 9) == 0); + if ((charpos_othercasebit & 0x100) != 0) + charpos_othercasebit = (charpos_othercasebit & 0xff) << 8; +#endif + if (charpos_othercasebit != 0) + charpos_char |= charpos_othercasebit; + + BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE; + BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char; + BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit; + } + } + + if (charpos_enabled) + { + if (opcode == OP_UPTO) + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1); + + /* Search the first instance of charpos_char. */ + jump = JUMP(SLJIT_JUMP); + label = LABEL(); + if (opcode == OP_UPTO) + { + OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO)); + } + compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE); + if (fast_str_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); + JUMPHERE(jump); + + detect_partial_match(common, &backtrack->topbacktracks); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (charpos_othercasebit != 0) + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); + + if (private_data_ptr == 0) + allocate_stack(common, 2); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); - else - OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1); - label = LABEL(); - compile_char1_matchingpath(common, type, cc, &nomatch); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - if (opcode <= OP_PLUS) - JUMPTO(SLJIT_JUMP, label); - else if (opcode == OP_CRRANGE && arg1 == 0) + if (opcode == OP_UPTO) + { + OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); + } + + /* Search the last instance of charpos_char. */ + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &no_match, FALSE); + if (fast_str_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); + detect_partial_match(common, &no_match); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (charpos_othercasebit != 0) + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); + if (opcode == OP_STAR) + { + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + } + else + { + jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + JUMPHERE(jump); + } + + if (opcode == OP_UPTO) + { + OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } + else + JUMPTO(SLJIT_JUMP, label); + + set_jumps(no_match, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + } +#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 + else if (common->utf) { - OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1); - JUMPTO(SLJIT_JUMP, label); + if (private_data_ptr == 0) + allocate_stack(common, 2); + + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); + + if (opcode == OP_UPTO) + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); + + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &no_match, TRUE); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + + if (opcode == OP_UPTO) + { + OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } + else + JUMPTO(SLJIT_JUMP, label); + + set_jumps(no_match, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + if (fast_str_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); } +#endif else { - OP1(SLJIT_MOV, TMP1, 0, base, offset1); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - OP1(SLJIT_MOV, base, offset1, TMP1, 0); - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label); + if (private_data_ptr == 0) + allocate_stack(common, 2); + + OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); + if (opcode == OP_UPTO) + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); + + label = LABEL(); + detect_partial_match(common, &no_match); + compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); + if (opcode == OP_UPTO) + { + OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + } + else + JUMPTO(SLJIT_JUMP, label); + + set_jumps(no_char1_match, LABEL()); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + set_jumps(no_match, LABEL()); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + if (fast_str_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); } - set_jumps(nomatch, LABEL()); - if (opcode == OP_CRRANGE) - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1)); - OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); } - BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); break; case OP_MINSTAR: - case OP_MINPLUS: - if (opcode == OP_MINPLUS) - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); if (private_data_ptr == 0) allocate_stack(common, 1); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + if (fast_str_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); break; case OP_MINUPTO: - case OP_CRMINRANGE: + SLJIT_ASSERT(fast_str_ptr == 0); if (private_data_ptr == 0) allocate_stack(common, 2); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1); - if (opcode == OP_CRMINRANGE) - add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); - BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); + OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); break; case OP_QUERY: case OP_MINQUERY: + SLJIT_ASSERT(fast_str_ptr == 0); if (private_data_ptr == 0) allocate_stack(common, 1); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); if (opcode == OP_QUERY) - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); - BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); + compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); break; case OP_EXACT: - OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1); - label = LABEL(); - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); - OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - JUMPTO(SLJIT_C_NOT_ZERO, label); break; case OP_POSSTAR: - case OP_POSPLUS: - case OP_POSUPTO: - if (opcode == OP_POSPLUS) - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); - if (opcode == OP_POSUPTO) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1); - OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); - label = LABEL(); - compile_char1_matchingpath(common, type, cc, &nomatch); - OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); - if (opcode != OP_POSUPTO) +#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 + if (common->utf) + { + OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &no_match, TRUE); + OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); JUMPTO(SLJIT_JUMP, label); - else + set_jumps(no_match, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); + if (fast_str_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); + break; + } +#endif + label = LABEL(); + detect_partial_match(common, &no_match); + compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); + JUMPTO(SLJIT_JUMP, label); + set_jumps(no_char1_match, LABEL()); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + set_jumps(no_match, LABEL()); + if (fast_str_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); + break; + + case OP_POSUPTO: + SLJIT_ASSERT(fast_str_ptr == 0); +#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 + if (common->utf) { - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1); - JUMPTO(SLJIT_C_NOT_ZERO, label); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0); + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &no_match, TRUE); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0); + OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + set_jumps(no_match, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1); + break; } - set_jumps(nomatch, LABEL()); - OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); +#endif + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); + label = LABEL(); + detect_partial_match(common, &no_match); + compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); + OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + set_jumps(no_char1_match, LABEL()); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + set_jumps(no_match, LABEL()); break; case OP_POSQUERY: + SLJIT_ASSERT(fast_str_ptr == 0); OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); - compile_char1_matchingpath(common, type, cc, &nomatch); + compile_char1_matchingpath(common, type, cc, &no_match, TRUE); OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); - set_jumps(nomatch, LABEL()); + set_jumps(no_match, LABEL()); OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); break; @@ -7279,7 +9163,7 @@ if (*cc == OP_FAIL) return cc + 1; } -if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL) +if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty) { /* No need to check notempty conditions. */ if (common->accept_label == NULL) @@ -7290,22 +9174,22 @@ if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL) } if (common->accept_label == NULL) - add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0))); + add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0))); else - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->accept_label); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty)); -add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart)); +OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty)); +add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); +OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart)); if (common->accept_label == NULL) - add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + add_jump(compiler, &common->accept, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); else - CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label); + CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); if (common->accept_label == NULL) - add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0)); + add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0)); else - CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label); + CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label); add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); return cc + 1; } @@ -7321,11 +9205,11 @@ if (common->currententry != NULL) return cc + 1 + IMM2_SIZE; if (!optimized_cbracket) - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset)); offset <<= 1; -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); if (!optimized_cbracket) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); return cc + 1 + IMM2_SIZE; } @@ -7352,7 +9236,7 @@ if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG) { OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0); OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); } @@ -7377,12 +9261,12 @@ BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, size = BACKTRACK_AS(then_trap_backtrack)->framesize; size = 3 + (size < 0 ? 0 : size); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); allocate_stack(common, size); if (size > 3) - OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw)); + OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw)); else - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0); @@ -7418,6 +9302,16 @@ while (cc < ccend) case OP_SOM: case OP_NOT_WORD_BOUNDARY: case OP_WORD_BOUNDARY: + case OP_EODN: + case OP_EOD: + case OP_DOLL: + case OP_DOLLM: + case OP_CIRC: + case OP_CIRCM: + case OP_REVERSE: + cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); + break; + case OP_NOT_DIGIT: case OP_DIGIT: case OP_NOT_WHITESPACE: @@ -7435,23 +9329,16 @@ while (cc < ccend) case OP_NOT_VSPACE: case OP_VSPACE: case OP_EXTUNI: - case OP_EODN: - case OP_EOD: - case OP_CIRC: - case OP_CIRCM: - case OP_DOLL: - case OP_DOLLM: case OP_NOT: case OP_NOTI: - case OP_REVERSE: - cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); break; case OP_SET_SOM: PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); allocate_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); cc++; break; @@ -7461,7 +9348,7 @@ while (cc < ccend) if (common->mode == JIT_COMPILE) cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); else - cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); break; case OP_STAR: @@ -7534,27 +9421,42 @@ while (cc < ccend) case OP_CLASS: case OP_NCLASS: - if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE) + if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE) cc = compile_iterator_matchingpath(common, cc, parent); else - cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); break; #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 case OP_XCLASS: - if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE) + if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE) cc = compile_iterator_matchingpath(common, cc, parent); else - cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); break; #endif case OP_REF: case OP_REFI: - if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE) + if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE) cc = compile_ref_iterator_matchingpath(common, cc, parent); else - cc = compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); + { + compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); + cc += 1 + IMM2_SIZE; + } + break; + + case OP_DNREF: + case OP_DNREFI: + if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE) + cc = compile_ref_iterator_matchingpath(common, cc, parent); + else + { + compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); + compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); + cc += 1 + 2 * IMM2_SIZE; + } break; case OP_RECURSE: @@ -7588,8 +9490,7 @@ while (cc < ccend) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0); } BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL(); - if (cc[1] > OP_ASSERTBACK_NOT) - count_match(common); + count_match(common); break; case OP_ONCE: @@ -7624,17 +9525,17 @@ while (cc < ccend) case OP_MARK: PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); SLJIT_ASSERT(common->mark_ptr != 0); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); allocate_stack(common, common->has_skip_arg ? 5 : 1); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0); OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); if (common->has_skip_arg) { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, STACK_TOP, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0); @@ -7707,95 +9608,82 @@ DEFINE_COMPILER; pcre_uchar *cc = current->cc; pcre_uchar opcode; pcre_uchar type; -int arg1 = -1, arg2 = -1; +sljit_u32 max = 0, exact; struct sljit_label *label = NULL; struct sljit_jump *jump = NULL; jump_list *jumplist = NULL; +pcre_uchar *end; int private_data_ptr = PRIVATE_DATA(cc); -int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG); +int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP); int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); -cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL); +cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end); switch(opcode) { case OP_STAR: - case OP_PLUS: case OP_UPTO: - case OP_CRRANGE: if (type == OP_ANYNL || type == OP_EXTUNI) { SLJIT_ASSERT(private_data_ptr == 0); - set_jumps(current->topbacktracks, LABEL()); + set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL()); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath); } else { - if (opcode == OP_UPTO) - arg2 = 0; - if (opcode <= OP_PLUS) + if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled) { OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - jump = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, base, offset1); + OP1(SLJIT_MOV, TMP2, 0, base, offset1); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); + label = LABEL(); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0) + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit); + CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath); + skip_char_back(common); + CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label); } else { - OP1(SLJIT_MOV, TMP1, 0, base, offset1); OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, arg2 + 1); - OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1); + jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1); + skip_char_back(common); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); } - skip_char_back(common); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); - if (opcode == OP_CRRANGE) - set_jumps(current->topbacktracks, LABEL()); JUMPHERE(jump); if (private_data_ptr == 0) free_stack(common, 2); - if (opcode == OP_PLUS) - set_jumps(current->topbacktracks, LABEL()); } break; case OP_MINSTAR: - case OP_MINPLUS: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - compile_char1_matchingpath(common, type, cc, &jumplist); + compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); set_jumps(jumplist, LABEL()); if (private_data_ptr == 0) free_stack(common, 1); - if (opcode == OP_MINPLUS) - set_jumps(current->topbacktracks, LABEL()); break; case OP_MINUPTO: - case OP_CRMINRANGE: - if (opcode == OP_CRMINRANGE) - { - label = LABEL(); - set_jumps(current->topbacktracks, label); - } + OP1(SLJIT_MOV, TMP1, 0, base, offset1); OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - compile_char1_matchingpath(common, type, cc, &jumplist); + OP2(SLJIT_SUB | SLJIT_SET_E, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); + add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO)); - OP1(SLJIT_MOV, TMP1, 0, base, offset1); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); OP1(SLJIT_MOV, base, offset1, TMP1, 0); - - if (opcode == OP_CRMINRANGE) - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label); - - if (opcode == OP_CRMINRANGE && arg1 == 0) - JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); - else - CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_backtrack)->matchingpath); + compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); set_jumps(jumplist, LABEL()); if (private_data_ptr == 0) @@ -7805,12 +9693,12 @@ switch(opcode) case OP_QUERY: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath); jump = JUMP(SLJIT_JUMP); - set_jumps(current->topbacktracks, LABEL()); + set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL()); OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); - JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); JUMPHERE(jump); if (private_data_ptr == 0) free_stack(common, 1); @@ -7819,9 +9707,9 @@ switch(opcode) case OP_MINQUERY: OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); - jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); - compile_char1_matchingpath(common, type, cc, &jumplist); - JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); + jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); + compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); + JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); set_jumps(jumplist, LABEL()); JUMPHERE(jump); if (private_data_ptr == 0) @@ -7829,10 +9717,6 @@ switch(opcode) break; case OP_EXACT: - case OP_POSPLUS: - set_jumps(current->topbacktracks, LABEL()); - break; - case OP_POSSTAR: case OP_POSQUERY: case OP_POSUPTO: @@ -7842,28 +9726,33 @@ switch(opcode) SLJIT_ASSERT_STOP(); break; } + +set_jumps(current->topbacktracks, LABEL()); } static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) { DEFINE_COMPILER; pcre_uchar *cc = current->cc; +BOOL ref = (*cc == OP_REF || *cc == OP_REFI); pcre_uchar type; -type = cc[1 + IMM2_SIZE]; +type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE]; + if ((type & 0x1) == 0) { + /* Maximize case. */ set_jumps(current->topbacktracks, LABEL()); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath); return; } OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); -CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); +CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath); set_jumps(current->topbacktracks, LABEL()); -free_stack(common, 2); +free_stack(common, ref ? 2 : 3); } static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current) @@ -7881,14 +9770,14 @@ if (common->has_set_som && common->mark_ptr != 0) OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); free_stack(common, 2); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0); } else if (common->has_set_som || common->mark_ptr != 0) { OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0); } } @@ -7919,7 +9808,7 @@ if (CURRENT_AS(assert_backtrack)->framesize < 0) if (bra == OP_BRAZERO) { OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath); free_stack(common, 1); } return; @@ -7930,19 +9819,19 @@ if (bra == OP_BRAZERO) if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT) { OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath); free_stack(common, 1); return; } free_stack(common, 1); - brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); + brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); } if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK) { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->private_data_ptr); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw)); set_jumps(current->topbacktracks, LABEL()); } @@ -7962,21 +9851,22 @@ if (bra == OP_BRAZERO) static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current) { DEFINE_COMPILER; -int opcode, stacksize, count; +int opcode, stacksize, alt_count, alt_max; int offset = 0; int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr; int repeat_ptr = 0, repeat_type = 0, repeat_count = 0; pcre_uchar *cc = current->cc; pcre_uchar *ccbegin; pcre_uchar *ccprev; -jump_list *jumplist = NULL; -jump_list *jumplistitem = NULL; pcre_uchar bra = OP_BRA; pcre_uchar ket; assert_backtrack *assert; +sljit_uw *next_update_addr = NULL; BOOL has_alternatives; BOOL needs_control_head = FALSE; struct sljit_jump *brazero = NULL; +struct sljit_jump *alt1 = NULL; +struct sljit_jump *alt2 = NULL; struct sljit_jump *once = NULL; struct sljit_jump *cond = NULL; struct sljit_label *rmin_label = NULL; @@ -8014,6 +9904,8 @@ if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC)) opcode = OP_ONCE; +alt_max = has_alternatives ? no_alternatives(ccbegin) : 0; + /* Decoding the needs_control_head in framesize. */ if (opcode == OP_ONCE) { @@ -8027,9 +9919,9 @@ if (ket != OP_KET && repeat_type != 0) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); if (repeat_type == OP_UPTO) - OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1); else - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0); } if (ket == OP_KETRMAX) @@ -8038,7 +9930,7 @@ if (ket == OP_KETRMAX) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); - brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0); + brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0); } } else if (ket == OP_KETRMIN) @@ -8049,7 +9941,7 @@ else if (ket == OP_KETRMIN) if (repeat_type != 0) { /* TMP1 was set a few lines above. */ - CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); /* Drop STR_PTR for non-greedy plus quantifier. */ if (opcode != OP_ONCE) free_stack(common, 1); @@ -8058,11 +9950,11 @@ else if (ket == OP_KETRMIN) { /* Checking zero-length iteration. */ if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0) - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); else { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath); } /* Drop STR_PTR for non-greedy plus quantifier. */ if (opcode != OP_ONCE) @@ -8073,17 +9965,17 @@ else if (ket == OP_KETRMIN) } rmin_label = LABEL(); if (repeat_type != 0) - OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1); + OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); } else if (bra == OP_BRAZERO) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); - brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0); + brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0); } else if (repeat_type == OP_EXACT) { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); exact_label = LABEL(); } @@ -8094,19 +9986,19 @@ if (offset != 0) SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); free_stack(common, 3); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP2, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0); } else if (common->optimized_cbracket[offset >> 1] == 0) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); free_stack(common, 2); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); } } @@ -8114,7 +10006,7 @@ if (SLJIT_UNLIKELY(opcode == OP_ONCE)) { if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); } once = JUMP(SLJIT_JUMP); @@ -8127,44 +10019,30 @@ else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); - jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list)); - if (SLJIT_UNLIKELY(!jumplistitem)) - return; - jumplist = jumplistitem; - jumplistitem->next = NULL; - jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 1); + alt_max = 2; + alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw)); } } -else if (*cc == OP_ALT) +else if (has_alternatives) { - /* Build a jump list. Get the last successfully matched branch index. */ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); - count = 1; - do - { - /* Append as the last item. */ - if (jumplist != NULL) - { - jumplistitem->next = sljit_alloc_memory(compiler, sizeof(jump_list)); - jumplistitem = jumplistitem->next; - } - else - { - jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list)); - jumplist = jumplistitem; - } - if (SLJIT_UNLIKELY(!jumplistitem)) + if (alt_max > 4) + { + /* Table jump if alt_max is greater than 4. */ + next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw)); + if (SLJIT_UNLIKELY(next_update_addr == NULL)) return; - - jumplistitem->next = NULL; - jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, count++); - cc += GET(cc, 1); + sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr); + add_label_addr(common, next_update_addr++); + } + else + { + if (alt_max == 4) + alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw)); + alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw)); } - while (*cc == OP_ALT); - - cc = ccbegin + GET(ccbegin, 1); } COMPILE_BACKTRACKINGPATH(current->top); @@ -8180,9 +10058,9 @@ if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) assert = CURRENT_AS(bracket_backtrack)->u.assert; if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK)) { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw)); } cond = JUMP(SLJIT_JUMP); set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL()); @@ -8199,7 +10077,7 @@ if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) if (has_alternatives) { - count = 1; + alt_count = sizeof(sljit_uw); do { current->top = NULL; @@ -8215,7 +10093,7 @@ if (has_alternatives) if (opcode != OP_ONCE) { if (private_data_ptr != 0) - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); else OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); } @@ -8236,7 +10114,7 @@ if (has_alternatives) if (repeat_type == OP_MINUPTO) { /* We need to preserve the counter. TMP2 will be used below. */ - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr); stacksize++; } if (ket != OP_KET || bra != OP_BRA) @@ -8275,22 +10153,37 @@ if (has_alternatives) stacksize = match_capture_common(common, stacksize, offset, private_data_ptr); if (opcode != OP_ONCE) - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, count++); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count); if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0) { /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */ SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); } JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath); if (opcode != OP_ONCE) { - SLJIT_ASSERT(jumplist); - JUMPHERE(jumplist->jump); - jumplist = jumplist->next; + if (alt_max > 4) + add_label_addr(common, next_update_addr++); + else + { + if (alt_count != 2 * sizeof(sljit_uw)) + { + JUMPHERE(alt1); + if (alt_max == 3 && alt_count == sizeof(sljit_uw)) + alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw)); + } + else + { + JUMPHERE(alt2); + if (alt_max == 4) + alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw)); + } + } + alt_count += sizeof(sljit_uw); } COMPILE_BACKTRACKINGPATH(current->top); @@ -8299,7 +10192,6 @@ if (has_alternatives) SLJIT_ASSERT(!current->nextbacktracks); } while (*cc == OP_ALT); - SLJIT_ASSERT(!jumplist); if (cond != NULL) { @@ -8307,9 +10199,9 @@ if (has_alternatives) assert = CURRENT_AS(bracket_backtrack)->u.assert; if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0) { - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw)); } JUMPHERE(cond); } @@ -8327,19 +10219,19 @@ if (offset != 0) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); free_stack(common, 2); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); } else { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); } } else if (opcode == OP_SBRA || opcode == OP_SCOND) { - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); } else if (opcode == OP_ONCE) @@ -8357,26 +10249,28 @@ else if (opcode == OP_ONCE) /* The STR_PTR must be released. */ stacksize++; } - free_stack(common, stacksize); + + if (stacksize > 0) + free_stack(common, stacksize); JUMPHERE(once); /* Restore previous private_data_ptr */ if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw)); else if (ket == OP_KETRMIN) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); /* See the comment below. */ free_stack(common, 2); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); } } if (repeat_type == OP_EXACT) { - OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, TMP1, 0); - CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label); + OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0); + CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label); } else if (ket == OP_KETRMAX) { @@ -8384,7 +10278,7 @@ else if (ket == OP_KETRMAX) if (bra != OP_BRAZERO) free_stack(common, 1); - CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); + CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); if (bra == OP_BRAZERO) { OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); @@ -8402,7 +10296,7 @@ else if (ket == OP_KETRMIN) affect badly the free_stack(2) above. */ if (opcode != OP_ONCE) free_stack(common, 1); - CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label); if (opcode == OP_ONCE) free_stack(common, bra == OP_BRAMINZERO ? 2 : 1); else if (bra == OP_BRAMINZERO) @@ -8429,19 +10323,19 @@ if (CURRENT_AS(bracketpos_backtrack)->framesize < 0) offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1; OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); if (common->capture_last_ptr != 0) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); if (common->capture_last_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0); } set_jumps(current->topbacktracks, LABEL()); free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); return; } -OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->private_data_ptr); +OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); if (current->topbacktracks) @@ -8452,7 +10346,7 @@ if (current->topbacktracks) free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); JUMPHERE(jump); } -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw)); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw)); } static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current) @@ -8492,7 +10386,7 @@ if (opcode == OP_THEN || opcode == OP_THEN_ARG) { SLJIT_ASSERT(common->control_head_ptr != 0); - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start); jump = JUMP(SLJIT_JUMP); @@ -8500,8 +10394,8 @@ if (opcode == OP_THEN || opcode == OP_THEN_ARG) loop = LABEL(); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), -(int)sizeof(sljit_sw)); JUMPHERE(jump); - CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop); - CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop); + CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop); + CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop); add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP)); return; } @@ -8524,14 +10418,14 @@ if (common->local_exit) if (opcode == OP_SKIP_ARG) { SLJIT_ASSERT(common->control_head_ptr != 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2)); sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark)); - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); - add_jump(compiler, &common->reset_match, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1)); + add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1)); return; } @@ -8569,7 +10463,7 @@ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 3); JUMPHERE(jump); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0); } static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current) @@ -8586,7 +10480,7 @@ while (current) case OP_SET_SOM: OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0); break; case OP_STAR: @@ -8664,6 +10558,8 @@ while (current) case OP_REF: case OP_REFI: + case OP_DNREF: + case OP_DNREFI: compile_ref_iterator_backtrackingpath(common, current); break; @@ -8713,9 +10609,9 @@ while (current) if (common->has_skip_arg) OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, common->has_skip_arg ? 5 : 1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0); if (common->has_skip_arg) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0); break; case OP_THEN: @@ -8762,7 +10658,7 @@ static SLJIT_INLINE void compile_recurse(compiler_common *common) DEFINE_COMPILER; pcre_uchar *cc = common->start + common->currententry->start; pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE); -pcre_uchar *ccend = bracketend(cc); +pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE); BOOL needs_control_head; int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head); int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head); @@ -8785,12 +10681,13 @@ common->currententry->entry = LABEL(); set_jumps(common->currententry->calls, common->currententry->entry); sljit_emit_fast_enter(compiler, TMP2, 0); +count_match(common); allocate_stack(common, private_data_size + framesize + alternativesize); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0); copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head); if (needs_control_head) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, STACK_TOP, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0); if (needs_frame) init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE); @@ -8837,7 +10734,7 @@ jump = JUMP(SLJIT_JUMP); if (common->quit != NULL) { set_jumps(common->quit, LABEL()); - OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr); + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); if (needs_frame) { OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); @@ -8850,7 +10747,7 @@ if (common->quit != NULL) } set_jumps(common->accept, LABEL()); -OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr); +OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); if (needs_frame) { OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); @@ -8868,15 +10765,15 @@ if (needs_control_head) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0); OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0); } else { OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw)); OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head_ptr, TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0); } sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0); } @@ -8891,23 +10788,25 @@ struct sljit_compiler *compiler; backtrack_common rootbacktrack; compiler_common common_data; compiler_common *common = &common_data; -const pcre_uint8 *tables = re->tables; +const sljit_u8 *tables = re->tables; pcre_study_data *study; int private_data_size; pcre_uchar *ccend; executable_functions *functions; void *executable_func; sljit_uw executable_size; +sljit_uw total_length; +label_addr_list *label_addr; struct sljit_label *mainloop_label = NULL; struct sljit_label *continue_match_label; -struct sljit_label *empty_match_found_label; -struct sljit_label *empty_match_backtrack_label; +struct sljit_label *empty_match_found_label = NULL; +struct sljit_label *empty_match_backtrack_label = NULL; struct sljit_label *reset_match_label; +struct sljit_label *quit_label; struct sljit_jump *jump; struct sljit_jump *minlength_check_failed = NULL; struct sljit_jump *reqbyte_notfound = NULL; -struct sljit_jump *empty_match; -struct sljit_label *quit_label; +struct sljit_jump *empty_match = NULL; SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0); study = extra->study_data; @@ -8920,9 +10819,11 @@ memset(common, 0, sizeof(compiler_common)); rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size; common->start = rootbacktrack.cc; +common->read_only_data_head = NULL; common->fcc = tables + fcc_offset; common->lcc = (sljit_sw)(tables + lcc_offset); common->mode = mode; +common->might_be_empty = study->minlength == 0; common->nltype = NLTYPE_FIXED; switch(re->options & PCRE_NEWLINE_BITS) { @@ -8943,6 +10844,8 @@ switch(re->options & PCRE_NEWLINE_BITS) case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break; default: return; } +common->nlmax = READ_CHAR_MAX; +common->nlmin = 0; if ((re->options & PCRE_BSR_ANYCRLF) != 0) common->bsr_nltype = NLTYPE_ANYCRLF; else if ((re->options & PCRE_BSR_UNICODE) != 0) @@ -8955,10 +10858,11 @@ else common->bsr_nltype = NLTYPE_ANY; #endif } +common->bsr_nlmax = READ_CHAR_MAX; +common->bsr_nlmin = 0; common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; common->ctypes = (sljit_sw)(tables + ctypes_offset); -common->digits[0] = -2; -common->name_table = (sljit_sw)((pcre_uchar *)re + re->name_table_offset); +common->name_table = ((pcre_uchar *)re) + re->name_table_offset; common->name_count = re->name_count; common->name_entry_size = re->name_entry_size; common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; @@ -8968,12 +10872,35 @@ common->utf = (re->options & PCRE_UTF8) != 0; #ifdef SUPPORT_UCP common->use_ucp = (re->options & PCRE_UCP) != 0; #endif +if (common->utf) + { + if (common->nltype == NLTYPE_ANY) + common->nlmax = 0x2029; + else if (common->nltype == NLTYPE_ANYCRLF) + common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL; + else + { + /* We only care about the first newline character. */ + common->nlmax = common->newline & 0xff; + } + + if (common->nltype == NLTYPE_FIXED) + common->nlmin = common->newline & 0xff; + else + common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL; + + if (common->bsr_nltype == NLTYPE_ANY) + common->bsr_nlmax = 0x2029; + else + common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL; + common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL; + } #endif /* SUPPORT_UTF */ -ccend = bracketend(rootbacktrack.cc); +ccend = bracketend(common->start); /* Calculate the local space size on the stack. */ common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw); -common->optimized_cbracket = (pcre_uint8 *)SLJIT_MALLOC(re->top_bracket + 1); +common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, compiler->allocator_data); if (!common->optimized_cbracket) return; #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1 @@ -8982,14 +10909,14 @@ memset(common->optimized_cbracket, 0, re->top_bracket + 1); memset(common->optimized_cbracket, 1, re->top_bracket + 1); #endif -SLJIT_ASSERT(*rootbacktrack.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET); +SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET); #if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2 common->capture_last_ptr = common->ovector_start; common->ovector_start += sizeof(sljit_sw); #endif -if (!check_opcode_types(common, rootbacktrack.cc, ccend)) +if (!check_opcode_types(common, common->start, ccend)) { - SLJIT_FREE(common->optimized_cbracket); + SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); return; } @@ -9008,15 +10935,10 @@ if (mode != JIT_COMPILE) common->hit_start = common->ovector_start; common->ovector_start += 2 * sizeof(sljit_sw); } - else - { - SLJIT_ASSERT(mode == JIT_PARTIAL_HARD_COMPILE); - common->needs_start_ptr = TRUE; - } } if ((re->options & PCRE_FIRSTLINE) != 0) { - common->first_line_end = common->ovector_start; + common->match_end_ptr = common->ovector_start; common->ovector_start += sizeof(sljit_sw); } #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD @@ -9027,14 +10949,12 @@ if (common->control_head_ptr != 0) common->control_head_ptr = common->ovector_start; common->ovector_start += sizeof(sljit_sw); } -if (common->needs_start_ptr && common->has_set_som) +if (common->has_set_som) { /* Saving the real start pointer is necessary. */ common->start_ptr = common->ovector_start; common->ovector_start += sizeof(sljit_sw); } -else - common->needs_start_ptr = FALSE; /* Aligning ovector to even number of sljit words. */ if ((common->ovector_start & sizeof(sljit_sw)) != 0) @@ -9050,88 +10970,93 @@ if (common->capture_last_ptr != 0) SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0)); common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw); -common->private_data_ptrs = (int *)SLJIT_MALLOC((ccend - rootbacktrack.cc) * sizeof(sljit_si)); +total_length = ccend - common->start; +common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), compiler->allocator_data); if (!common->private_data_ptrs) { - SLJIT_FREE(common->optimized_cbracket); + SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); return; } -memset(common->private_data_ptrs, 0, (ccend - rootbacktrack.cc) * sizeof(int)); +memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32)); private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw); set_private_data_ptrs(common, &private_data_size, ccend); +if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0) + { + if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back) + detect_fast_fail(common, common->start, &private_data_size, 4); + } + +SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr); + if (private_data_size > SLJIT_MAX_LOCAL_SIZE) { - SLJIT_FREE(common->private_data_ptrs); - SLJIT_FREE(common->optimized_cbracket); + SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); + SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); return; } if (common->has_then) { - common->then_offsets = (pcre_uint8 *)SLJIT_MALLOC(ccend - rootbacktrack.cc); - if (!common->then_offsets) - { - SLJIT_FREE(common->optimized_cbracket); - SLJIT_FREE(common->private_data_ptrs); - return; - } - memset(common->then_offsets, 0, ccend - rootbacktrack.cc); - set_then_offsets(common, rootbacktrack.cc, NULL); + common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length); + memset(common->then_offsets, 0, total_length); + set_then_offsets(common, common->start, NULL); } -compiler = sljit_create_compiler(); +compiler = sljit_create_compiler(NULL); if (!compiler) { - SLJIT_FREE(common->optimized_cbracket); - SLJIT_FREE(common->private_data_ptrs); - if (common->has_then) - SLJIT_FREE(common->then_offsets); + SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); + SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); return; } common->compiler = compiler; /* Main pcre_jit_exec entry. */ -sljit_emit_enter(compiler, 1, 5, 5, private_data_size); +sljit_emit_enter(compiler, 0, 1, 5, 5, 0, 0, private_data_size); /* Register init. */ reset_ovector(common, (re->top_bracket + 1) * 2); if (common->req_char_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, SLJIT_SCRATCH_REG1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0); -OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_SAVED_REG1, 0); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_SAVED_REG1, 0); +OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); -OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match)); +OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match)); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base)); OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit)); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LIMIT_MATCH, TMP1, 0); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0); + +if (common->fast_fail_start_ptr < common->fast_fail_end_ptr) + reset_fast_fail(common); if (mode == JIT_PARTIAL_SOFT_COMPILE) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1); if (common->mark_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0); if (common->control_head_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); /* Main part of the matching */ if ((re->options & PCRE_ANCHORED) == 0) { - mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0); + mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0); continue_match_label = LABEL(); /* Forward search if possible. */ if ((re->options & PCRE_NO_START_OPTIMIZE) == 0) { - if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0)) - { /* Do nothing */ } + if (mode == JIT_COMPILE && fast_forward_first_n_chars(common)) + ; else if ((re->flags & PCRE_FIRSTSET) != 0) - fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0); + fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0); else if ((re->flags & PCRE_STARTLINE) != 0) - fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0); - else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0) - fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0); + fast_forward_newline(common); + else if (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0) + fast_forward_start_bits(common, study->start_bits); } } else @@ -9141,50 +11066,49 @@ if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_ { OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength)); - minlength_check_failed = CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0); + minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0); } if (common->req_char_ptr != 0) reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0); /* Store the current STR_PTR in OVECTOR(0). */ -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0); /* Copy the limit of allowed recursions. */ -OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LIMIT_MATCH); +OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH); if (common->capture_last_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, -1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1); +if (common->fast_forward_bc_ptr != NULL) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0); -if (common->needs_start_ptr) - { - SLJIT_ASSERT(common->start_ptr != OVECTOR(0)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr, STR_PTR, 0); - } -else - SLJIT_ASSERT(common->start_ptr == OVECTOR(0)); +if (common->start_ptr != OVECTOR(0)) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0); /* Copy the beginning of the string. */ if (mode == JIT_PARTIAL_SOFT_COMPILE) { - jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start + sizeof(sljit_sw), STR_PTR, 0); + jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start + sizeof(sljit_sw), STR_PTR, 0); JUMPHERE(jump); } else if (mode == JIT_PARTIAL_HARD_COMPILE) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); -compile_matchingpath(common, rootbacktrack.cc, ccend, &rootbacktrack); +compile_matchingpath(common, common->start, ccend, &rootbacktrack); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) { sljit_free_compiler(compiler); - SLJIT_FREE(common->optimized_cbracket); - SLJIT_FREE(common->private_data_ptrs); - if (common->has_then) - SLJIT_FREE(common->then_offsets); + SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); + SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); + free_read_only_data(common->read_only_data_head, compiler->allocator_data); return; } -empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)); -empty_match_found_label = LABEL(); +if (common->might_be_empty) + { + empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); + empty_match_found_label = LABEL(); + } common->accept_label = LABEL(); if (common->accept != NULL) @@ -9208,15 +11132,15 @@ if (mode != JIT_COMPILE) return_with_partial_match(common, common->quit_label); } -empty_match_backtrack_label = LABEL(); +if (common->might_be_empty) + empty_match_backtrack_label = LABEL(); compile_backtrackingpath(common, rootbacktrack.top); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) { sljit_free_compiler(compiler); - SLJIT_FREE(common->optimized_cbracket); - SLJIT_FREE(common->private_data_ptrs); - if (common->has_then) - SLJIT_FREE(common->then_offsets); + SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); + SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); + free_read_only_data(common->read_only_data_head, compiler->allocator_data); return; } @@ -9226,28 +11150,33 @@ reset_match_label = LABEL(); if (mode == JIT_PARTIAL_SOFT_COMPILE) { /* Update hit_start only in the first time. */ - jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, TMP1, 0); + jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0); JUMPHERE(jump); } /* Check we have remaining characters. */ if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0) { - SLJIT_ASSERT(common->first_line_end != 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end); + SLJIT_ASSERT(common->match_end_ptr != 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); } -OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr); +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), + (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr); if ((re->options & PCRE_ANCHORED) == 0) { - if ((re->options & PCRE_FIRSTLINE) == 0) - CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop_label); + if (common->ff_newline_shortcut != NULL) + { + if ((re->options & PCRE_FIRSTLINE) == 0) + CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut); + /* There cannot be more newlines here. */ + } else - CMPTO(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0, mainloop_label); + CMPTO(SLJIT_LESS, STR_PTR, 0, ((re->options & PCRE_FIRSTLINE) == 0) ? STR_END : TMP1, 0, mainloop_label); } /* No more remaining characters. */ @@ -9255,23 +11184,29 @@ if (reqbyte_notfound != NULL) JUMPHERE(reqbyte_notfound); if (mode == JIT_PARTIAL_SOFT_COMPILE) - CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel); + CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel); OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); JUMPTO(SLJIT_JUMP, common->quit_label); flush_stubs(common); -JUMPHERE(empty_match); -OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty)); -CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart)); -CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); -CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label); -JUMPTO(SLJIT_JUMP, empty_match_backtrack_label); +if (common->might_be_empty) + { + JUMPHERE(empty_match); + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty)); + CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label); + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart)); + CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); + CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label); + JUMPTO(SLJIT_JUMP, empty_match_backtrack_label); + } +common->fast_forward_bc_ptr = NULL; +common->fast_fail_start_ptr = 0; +common->fast_fail_end_ptr = 0; common->currententry = common->entries; common->local_exit = TRUE; quit_label = common->quit_label; @@ -9282,10 +11217,9 @@ while (common->currententry != NULL) if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) { sljit_free_compiler(compiler); - SLJIT_FREE(common->optimized_cbracket); - SLJIT_FREE(common->private_data_ptrs); - if (common->has_then) - SLJIT_FREE(common->then_offsets); + SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); + SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); + free_read_only_data(common->read_only_data_head, compiler->allocator_data); return; } flush_stubs(common); @@ -9298,21 +11232,21 @@ common->quit_label = quit_label; /* This is a (really) rare case. */ set_jumps(common->stackalloc, LABEL()); /* RETURN_ADDR is not a saved register. */ -sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0); +sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0); OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE); sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize)); -jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); +jump = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top)); OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit)); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1); -sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); +sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); /* Allocation failed. */ JUMPHERE(jump); @@ -9364,19 +11298,22 @@ if (common->reset_match != NULL) { set_jumps(common->reset_match, LABEL()); do_reset_match(common, (re->top_bracket + 1) * 2); - CMPTO(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label); + CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label); OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); JUMPTO(SLJIT_JUMP, reset_match_label); } #ifdef SUPPORT_UTF -#ifndef COMPILE_PCRE32 +#ifdef COMPILE_PCRE8 if (common->utfreadchar != NULL) { set_jumps(common->utfreadchar, LABEL()); do_utfreadchar(common); } -#endif /* !COMPILE_PCRE32 */ -#ifdef COMPILE_PCRE8 +if (common->utfreadchar16 != NULL) + { + set_jumps(common->utfreadchar16, LABEL()); + do_utfreadchar16(common); + } if (common->utfreadtype8 != NULL) { set_jumps(common->utfreadtype8, LABEL()); @@ -9392,16 +11329,23 @@ if (common->getucd != NULL) } #endif -SLJIT_FREE(common->optimized_cbracket); -SLJIT_FREE(common->private_data_ptrs); -if (common->has_then) - SLJIT_FREE(common->then_offsets); +SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); +SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); executable_func = sljit_generate_code(compiler); executable_size = sljit_get_generated_code_size(compiler); +label_addr = common->label_addrs; +while (label_addr != NULL) + { + *label_addr->update_addr = sljit_get_label_addr(label_addr->label); + label_addr = label_addr->next; + } sljit_free_compiler(compiler); if (executable_func == NULL) + { + free_read_only_data(common->read_only_data_head, compiler->allocator_data); return; + } /* Reuse the function descriptor if possible. */ if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL) @@ -9417,12 +11361,13 @@ else * bit remains set, as the bit indicates that the pointer to the data * is valid.) */ - functions = SLJIT_MALLOC(sizeof(executable_functions)); + functions = SLJIT_MALLOC(sizeof(executable_functions), compiler->allocator_data); if (functions == NULL) { /* This case is highly unlikely since we just recently - freed a lot of memory. Although not impossible. */ + freed a lot of memory. Not impossible though. */ sljit_free_code(executable_func); + free_read_only_data(common->read_only_data_head, compiler->allocator_data); return; } memset(functions, 0, sizeof(executable_functions)); @@ -9433,16 +11378,17 @@ else } functions->executable_funcs[mode] = executable_func; +functions->read_only_data_heads[mode] = common->read_only_data_head; functions->executable_sizes[mode] = executable_size; } -static int jit_machine_stack_exec(jit_arguments *arguments, void* executable_func) +static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, void *executable_func) { union { - void* executable_func; + void *executable_func; jit_function call_executable_func; } convert_executable_func; -pcre_uint8 local_space[MACHINE_STACK_SIZE]; +sljit_u8 local_space[MACHINE_STACK_SIZE]; struct sljit_stack local_stack; local_stack.top = (sljit_sw)&local_space; @@ -9460,7 +11406,7 @@ PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject, { executable_functions *functions = (executable_functions *)extra_data->executable_jit; union { - void* executable_func; + void *executable_func; jit_function call_executable_func; } convert_executable_func; jit_arguments arguments; @@ -9482,7 +11428,7 @@ arguments.begin = subject; arguments.end = subject + length; arguments.mark_ptr = NULL; /* JIT decreases this value less frequently than the interpreter. */ -arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit); +arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit); if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match) arguments.limit_match = functions->limit_match; arguments.notbol = (options & PCRE_NOTBOL) != 0; @@ -9554,7 +11500,7 @@ pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data, pcre_uchar *subject_ptr = (pcre_uchar *)subject; executable_functions *functions = (executable_functions *)extra_data->executable_jit; union { - void* executable_func; + void *executable_func; jit_function call_executable_func; } convert_executable_func; jit_arguments arguments; @@ -9582,7 +11528,7 @@ arguments.begin = subject_ptr; arguments.end = subject_ptr + length; arguments.mark_ptr = NULL; /* JIT decreases this value less frequently than the interpreter. */ -arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit); +arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit); if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match) arguments.limit_match = functions->limit_match; arguments.notbol = (options & PCRE_NOTBOL) != 0; @@ -9626,8 +11572,9 @@ for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++) { if (functions->executable_funcs[i] != NULL) sljit_free_code(functions->executable_funcs[i]); + free_read_only_data(functions->read_only_data_heads[i], NULL); } -SLJIT_FREE(functions); +SLJIT_FREE(functions, compiler->allocator_data); } int @@ -9669,7 +11616,7 @@ if (startsize > maxsize) startsize = maxsize; startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); -return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize); +return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize, NULL); } #if defined COMPILE_PCRE8 @@ -9688,7 +11635,7 @@ PCRE_EXP_DECL void pcre32_jit_stack_free(pcre32_jit_stack *stack) #endif { -sljit_free_stack((struct sljit_stack *)stack); +sljit_free_stack((struct sljit_stack *)stack, NULL); } #if defined COMPILE_PCRE8 @@ -9718,6 +11665,20 @@ if (extra != NULL && } } +#if defined COMPILE_PCRE8 +PCRE_EXP_DECL void +pcre_jit_free_unused_memory(void) +#elif defined COMPILE_PCRE16 +PCRE_EXP_DECL void +pcre16_jit_free_unused_memory(void) +#elif defined COMPILE_PCRE32 +PCRE_EXP_DECL void +pcre32_jit_free_unused_memory(void) +#endif +{ +sljit_free_unused_memory_exec(); +} + #else /* SUPPORT_JIT */ /* These are dummy functions to avoid linking errors when JIT support is not @@ -9784,6 +11745,19 @@ pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void (void)userdata; } +#if defined COMPILE_PCRE8 +PCRE_EXP_DECL void +pcre_jit_free_unused_memory(void) +#elif defined COMPILE_PCRE16 +PCRE_EXP_DECL void +pcre16_jit_free_unused_memory(void) +#elif defined COMPILE_PCRE32 +PCRE_EXP_DECL void +pcre32_jit_free_unused_memory(void) +#endif +{ +} + #endif /* End of pcre_jit_compile.c */ |