1/************************************************* 2* Perl-Compatible Regular Expressions * 3*************************************************/ 4 5/* PCRE is a library of functions to support regular expressions whose syntax 6and semantics are as close as possible to those of the Perl 5 language. 7 8 Written by Philip Hazel 9 Copyright (c) 1997-2013 University of Cambridge 10 11 The machine code generator part (this module) was written by Zoltan Herczeg 12 Copyright (c) 2010-2013 13 14----------------------------------------------------------------------------- 15Redistribution and use in source and binary forms, with or without 16modification, are permitted provided that the following conditions are met: 17 18 * Redistributions of source code must retain the above copyright notice, 19 this list of conditions and the following disclaimer. 20 21 * Redistributions in binary form must reproduce the above copyright 22 notice, this list of conditions and the following disclaimer in the 23 documentation and/or other materials provided with the distribution. 24 25 * Neither the name of the University of Cambridge nor the names of its 26 contributors may be used to endorse or promote products derived from 27 this software without specific prior written permission. 28 29THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 30AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 33LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 34CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 35SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 36INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 37CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39POSSIBILITY OF SUCH DAMAGE. 40----------------------------------------------------------------------------- 41*/ 42 43#ifdef HAVE_CONFIG_H 44#include "config.h" 45#endif 46 47#include "pcre_internal.h" 48 49#if defined SUPPORT_JIT 50 51/* All-in-one: Since we use the JIT compiler only from here, 52we just include it. This way we don't need to touch the build 53system files. */ 54 55#define SLJIT_MALLOC(size) (PUBL(malloc))(size) 56#define SLJIT_FREE(ptr) (PUBL(free))(ptr) 57#define SLJIT_CONFIG_AUTO 1 58#define SLJIT_CONFIG_STATIC 1 59#define SLJIT_VERBOSE 0 60#define SLJIT_DEBUG 0 61 62#include "sljit/sljitLir.c" 63 64#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED 65#error Unsupported architecture 66#endif 67 68/* Defines for debugging purposes. */ 69 70/* 1 - Use unoptimized capturing brackets. 71 2 - Enable capture_last_ptr (includes option 1). */ 72/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */ 73 74/* 1 - Always have a control head. */ 75/* #define DEBUG_FORCE_CONTROL_HEAD 1 */ 76 77/* Allocate memory for the regex stack on the real machine stack. 78Fast, but limited size. */ 79#define MACHINE_STACK_SIZE 32768 80 81/* Growth rate for stack allocated by the OS. Should be the multiply 82of page size. */ 83#define STACK_GROWTH_RATE 8192 84 85/* Enable to check that the allocation could destroy temporaries. */ 86#if defined SLJIT_DEBUG && SLJIT_DEBUG 87#define DESTROY_REGISTERS 1 88#endif 89 90/* 91Short summary about the backtracking mechanism empolyed by the jit code generator: 92 93The code generator follows the recursive nature of the PERL compatible regular 94expressions. The basic blocks of regular expressions are condition checkers 95whose execute different commands depending on the result of the condition check. 96The relationship between the operators can be horizontal (concatenation) and 97vertical (sub-expression) (See struct backtrack_common for more details). 98 99 'ab' - 'a' and 'b' regexps are concatenated 100 'a+' - 'a' is the sub-expression of the '+' operator 101 102The condition checkers are boolean (true/false) checkers. Machine code is generated 103for the checker itself and for the actions depending on the result of the checker. 104The 'true' case is called as the matching path (expected path), and the other is called as 105the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken 106branches on the matching path. 107 108 Greedy star operator (*) : 109 Matching path: match happens. 110 Backtrack path: match failed. 111 Non-greedy star operator (*?) : 112 Matching path: no need to perform a match. 113 Backtrack path: match is required. 114 115The following example shows how the code generated for a capturing bracket 116with two alternatives. Let A, B, C, D are arbirary regular expressions, and 117we have the following regular expression: 118 119 A(B|C)D 120 121The generated code will be the following: 122 123 A matching path 124 '(' matching path (pushing arguments to the stack) 125 B matching path 126 ')' matching path (pushing arguments to the stack) 127 D matching path 128 return with successful match 129 130 D backtrack path 131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C") 132 B backtrack path 133 C expected path 134 jump to D matching path 135 C backtrack path 136 A backtrack path 137 138 Notice, that the order of backtrack code paths are the opposite of the fast 139 code paths. In this way the topmost value on the stack is always belong 140 to the current backtrack code path. The backtrack path must check 141 whether there is a next alternative. If so, it needs to jump back to 142 the matching path eventually. Otherwise it needs to clear out its own stack 143 frame and continue the execution on the backtrack code paths. 144*/ 145 146/* 147Saved stack frames: 148 149Atomic blocks and asserts require reloading the values of private data 150when the backtrack mechanism performed. Because of OP_RECURSE, the data 151are not necessarly known in compile time, thus we need a dynamic restore 152mechanism. 153 154The stack frames are stored in a chain list, and have the following format: 155([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ] 156 157Thus we can restore the private data to a particular point in the stack. 158*/ 159 160typedef struct jit_arguments { 161 /* Pointers first. */ 162 struct sljit_stack *stack; 163 const pcre_uchar *str; 164 const pcre_uchar *begin; 165 const pcre_uchar *end; 166 int *offsets; 167 pcre_uchar *uchar_ptr; 168 pcre_uchar *mark_ptr; 169 void *callout_data; 170 /* Everything else after. */ 171 pcre_uint32 limit_match; 172 int real_offset_count; 173 int offset_count; 174 pcre_uint8 notbol; 175 pcre_uint8 noteol; 176 pcre_uint8 notempty; 177 pcre_uint8 notempty_atstart; 178} jit_arguments; 179 180typedef struct executable_functions { 181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES]; 182 sljit_uw *read_only_data[JIT_NUMBER_OF_COMPILE_MODES]; 183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES]; 184 PUBL(jit_callback) callback; 185 void *userdata; 186 pcre_uint32 top_bracket; 187 pcre_uint32 limit_match; 188} executable_functions; 189 190typedef struct jump_list { 191 struct sljit_jump *jump; 192 struct jump_list *next; 193} jump_list; 194 195typedef struct stub_list { 196 struct sljit_jump *start; 197 struct sljit_label *quit; 198 struct stub_list *next; 199} stub_list; 200 201typedef struct label_addr_list { 202 struct sljit_label *label; 203 sljit_uw *update_addr; 204 struct label_addr_list *next; 205} label_addr_list; 206 207enum frame_types { 208 no_frame = -1, 209 no_stack = -2 210}; 211 212enum control_types { 213 type_mark = 0, 214 type_then_trap = 1 215}; 216 217typedef int (SLJIT_CALL *jit_function)(jit_arguments *args); 218 219/* The following structure is the key data type for the recursive 220code generator. It is allocated by compile_matchingpath, and contains 221the arguments for compile_backtrackingpath. Must be the first member 222of its descendants. */ 223typedef struct backtrack_common { 224 /* Concatenation stack. */ 225 struct backtrack_common *prev; 226 jump_list *nextbacktracks; 227 /* Internal stack (for component operators). */ 228 struct backtrack_common *top; 229 jump_list *topbacktracks; 230 /* Opcode pointer. */ 231 pcre_uchar *cc; 232} backtrack_common; 233 234typedef struct assert_backtrack { 235 backtrack_common common; 236 jump_list *condfailed; 237 /* Less than 0 if a frame is not needed. */ 238 int framesize; 239 /* Points to our private memory word on the stack. */ 240 int private_data_ptr; 241 /* For iterators. */ 242 struct sljit_label *matchingpath; 243} assert_backtrack; 244 245typedef struct bracket_backtrack { 246 backtrack_common common; 247 /* Where to coninue if an alternative is successfully matched. */ 248 struct sljit_label *alternative_matchingpath; 249 /* For rmin and rmax iterators. */ 250 struct sljit_label *recursive_matchingpath; 251 /* For greedy ? operator. */ 252 struct sljit_label *zero_matchingpath; 253 /* Contains the branches of a failed condition. */ 254 union { 255 /* Both for OP_COND, OP_SCOND. */ 256 jump_list *condfailed; 257 assert_backtrack *assert; 258 /* For OP_ONCE. Less than 0 if not needed. */ 259 int framesize; 260 } u; 261 /* Points to our private memory word on the stack. */ 262 int private_data_ptr; 263} bracket_backtrack; 264 265typedef struct bracketpos_backtrack { 266 backtrack_common common; 267 /* Points to our private memory word on the stack. */ 268 int private_data_ptr; 269 /* Reverting stack is needed. */ 270 int framesize; 271 /* Allocated stack size. */ 272 int stacksize; 273} bracketpos_backtrack; 274 275typedef struct braminzero_backtrack { 276 backtrack_common common; 277 struct sljit_label *matchingpath; 278} braminzero_backtrack; 279 280typedef struct iterator_backtrack { 281 backtrack_common common; 282 /* Next iteration. */ 283 struct sljit_label *matchingpath; 284} iterator_backtrack; 285 286typedef struct recurse_entry { 287 struct recurse_entry *next; 288 /* Contains the function entry. */ 289 struct sljit_label *entry; 290 /* Collects the calls until the function is not created. */ 291 jump_list *calls; 292 /* Points to the starting opcode. */ 293 sljit_sw start; 294} recurse_entry; 295 296typedef struct recurse_backtrack { 297 backtrack_common common; 298 BOOL inlined_pattern; 299} recurse_backtrack; 300 301#define OP_THEN_TRAP OP_TABLE_LENGTH 302 303typedef struct then_trap_backtrack { 304 backtrack_common common; 305 /* If then_trap is not NULL, this structure contains the real 306 then_trap for the backtracking path. */ 307 struct then_trap_backtrack *then_trap; 308 /* Points to the starting opcode. */ 309 sljit_sw start; 310 /* Exit point for the then opcodes of this alternative. */ 311 jump_list *quit; 312 /* Frame size of the current alternative. */ 313 int framesize; 314} then_trap_backtrack; 315 316#define MAX_RANGE_SIZE 4 317 318typedef struct compiler_common { 319 /* The sljit ceneric compiler. */ 320 struct sljit_compiler *compiler; 321 /* First byte code. */ 322 pcre_uchar *start; 323 /* Maps private data offset to each opcode. */ 324 sljit_si *private_data_ptrs; 325 /* This read-only data is available during runtime. */ 326 sljit_uw *read_only_data; 327 /* The total size of the read-only data. */ 328 sljit_uw read_only_data_size; 329 /* The next free entry of the read_only_data. */ 330 sljit_uw *read_only_data_ptr; 331 /* Tells whether the capturing bracket is optimized. */ 332 pcre_uint8 *optimized_cbracket; 333 /* Tells whether the starting offset is a target of then. */ 334 pcre_uint8 *then_offsets; 335 /* Current position where a THEN must jump. */ 336 then_trap_backtrack *then_trap; 337 /* Starting offset of private data for capturing brackets. */ 338 int cbra_ptr; 339 /* Output vector starting point. Must be divisible by 2. */ 340 int ovector_start; 341 /* Last known position of the requested byte. */ 342 int req_char_ptr; 343 /* Head of the last recursion. */ 344 int recursive_head_ptr; 345 /* First inspected character for partial matching. */ 346 int start_used_ptr; 347 /* Starting pointer for partial soft matches. */ 348 int hit_start; 349 /* End pointer of the first line. */ 350 int first_line_end; 351 /* Points to the marked string. */ 352 int mark_ptr; 353 /* Recursive control verb management chain. */ 354 int control_head_ptr; 355 /* Points to the last matched capture block index. */ 356 int capture_last_ptr; 357 /* Points to the starting position of the current match. */ 358 int start_ptr; 359 360 /* Flipped and lower case tables. */ 361 const pcre_uint8 *fcc; 362 sljit_sw lcc; 363 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */ 364 int mode; 365 /* TRUE, when minlength is greater than 0. */ 366 BOOL might_be_empty; 367 /* \K is found in the pattern. */ 368 BOOL has_set_som; 369 /* (*SKIP:arg) is found in the pattern. */ 370 BOOL has_skip_arg; 371 /* (*THEN) is found in the pattern. */ 372 BOOL has_then; 373 /* Needs to know the start position anytime. */ 374 BOOL needs_start_ptr; 375 /* Currently in recurse or negative assert. */ 376 BOOL local_exit; 377 /* Currently in a positive assert. */ 378 BOOL positive_assert; 379 /* Newline control. */ 380 int nltype; 381 pcre_uint32 nlmax; 382 pcre_uint32 nlmin; 383 int newline; 384 int bsr_nltype; 385 pcre_uint32 bsr_nlmax; 386 pcre_uint32 bsr_nlmin; 387 /* Dollar endonly. */ 388 int endonly; 389 /* Tables. */ 390 sljit_sw ctypes; 391 /* Named capturing brackets. */ 392 pcre_uchar *name_table; 393 sljit_sw name_count; 394 sljit_sw name_entry_size; 395 396 /* Labels and jump lists. */ 397 struct sljit_label *partialmatchlabel; 398 struct sljit_label *quit_label; 399 struct sljit_label *forced_quit_label; 400 struct sljit_label *accept_label; 401 struct sljit_label *ff_newline_shortcut; 402 stub_list *stubs; 403 label_addr_list *label_addrs; 404 recurse_entry *entries; 405 recurse_entry *currententry; 406 jump_list *partialmatch; 407 jump_list *quit; 408 jump_list *positive_assert_quit; 409 jump_list *forced_quit; 410 jump_list *accept; 411 jump_list *calllimit; 412 jump_list *stackalloc; 413 jump_list *revertframes; 414 jump_list *wordboundary; 415 jump_list *anynewline; 416 jump_list *hspace; 417 jump_list *vspace; 418 jump_list *casefulcmp; 419 jump_list *caselesscmp; 420 jump_list *reset_match; 421 BOOL jscript_compat; 422#ifdef SUPPORT_UTF 423 BOOL utf; 424#ifdef SUPPORT_UCP 425 BOOL use_ucp; 426#endif 427#ifdef COMPILE_PCRE8 428 jump_list *utfreadchar; 429 jump_list *utfreadchar16; 430 jump_list *utfreadtype8; 431#endif 432#endif /* SUPPORT_UTF */ 433#ifdef SUPPORT_UCP 434 jump_list *getucd; 435#endif 436} compiler_common; 437 438/* For byte_sequence_compare. */ 439 440typedef struct compare_context { 441 int length; 442 int sourcereg; 443#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED 444 int ucharptr; 445 union { 446 sljit_si asint; 447 sljit_uh asushort; 448#if defined COMPILE_PCRE8 449 sljit_ub asbyte; 450 sljit_ub asuchars[4]; 451#elif defined COMPILE_PCRE16 452 sljit_uh asuchars[2]; 453#elif defined COMPILE_PCRE32 454 sljit_ui asuchars[1]; 455#endif 456 } c; 457 union { 458 sljit_si asint; 459 sljit_uh asushort; 460#if defined COMPILE_PCRE8 461 sljit_ub asbyte; 462 sljit_ub asuchars[4]; 463#elif defined COMPILE_PCRE16 464 sljit_uh asuchars[2]; 465#elif defined COMPILE_PCRE32 466 sljit_ui asuchars[1]; 467#endif 468 } oc; 469#endif 470} compare_context; 471 472/* Undefine sljit macros. */ 473#undef CMP 474 475/* Used for accessing the elements of the stack. */ 476#define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw)) 477 478#define TMP1 SLJIT_R0 479#define TMP2 SLJIT_R2 480#define TMP3 SLJIT_R3 481#define STR_PTR SLJIT_S0 482#define STR_END SLJIT_S1 483#define STACK_TOP SLJIT_R1 484#define STACK_LIMIT SLJIT_S2 485#define COUNT_MATCH SLJIT_S3 486#define ARGUMENTS SLJIT_S4 487#define RETURN_ADDR SLJIT_R4 488 489/* Local space layout. */ 490/* These two locals can be used by the current opcode. */ 491#define LOCALS0 (0 * sizeof(sljit_sw)) 492#define LOCALS1 (1 * sizeof(sljit_sw)) 493/* Two local variables for possessive quantifiers (char1 cannot use them). */ 494#define POSSESSIVE0 (2 * sizeof(sljit_sw)) 495#define POSSESSIVE1 (3 * sizeof(sljit_sw)) 496/* Max limit of recursions. */ 497#define LIMIT_MATCH (4 * sizeof(sljit_sw)) 498/* The output vector is stored on the stack, and contains pointers 499to characters. The vector data is divided into two groups: the first 500group contains the start / end character pointers, and the second is 501the start pointers when the end of the capturing group has not yet reached. */ 502#define OVECTOR_START (common->ovector_start) 503#define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw)) 504#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw)) 505#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start]) 506 507#if defined COMPILE_PCRE8 508#define MOV_UCHAR SLJIT_MOV_UB 509#define MOVU_UCHAR SLJIT_MOVU_UB 510#elif defined COMPILE_PCRE16 511#define MOV_UCHAR SLJIT_MOV_UH 512#define MOVU_UCHAR SLJIT_MOVU_UH 513#elif defined COMPILE_PCRE32 514#define MOV_UCHAR SLJIT_MOV_UI 515#define MOVU_UCHAR SLJIT_MOVU_UI 516#else 517#error Unsupported compiling mode 518#endif 519 520/* Shortcuts. */ 521#define DEFINE_COMPILER \ 522 struct sljit_compiler *compiler = common->compiler 523#define OP1(op, dst, dstw, src, srcw) \ 524 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw)) 525#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \ 526 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w)) 527#define LABEL() \ 528 sljit_emit_label(compiler) 529#define JUMP(type) \ 530 sljit_emit_jump(compiler, (type)) 531#define JUMPTO(type, label) \ 532 sljit_set_label(sljit_emit_jump(compiler, (type)), (label)) 533#define JUMPHERE(jump) \ 534 sljit_set_label((jump), sljit_emit_label(compiler)) 535#define SET_LABEL(jump, label) \ 536 sljit_set_label((jump), (label)) 537#define CMP(type, src1, src1w, src2, src2w) \ 538 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)) 539#define CMPTO(type, src1, src1w, src2, src2w, label) \ 540 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label)) 541#define OP_FLAGS(op, dst, dstw, src, srcw, type) \ 542 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type)) 543#define GET_LOCAL_BASE(dst, dstw, offset) \ 544 sljit_get_local_base(compiler, (dst), (dstw), (offset)) 545 546#define READ_CHAR_MAX 0x7fffffff 547 548static pcre_uchar* bracketend(pcre_uchar* cc) 549{ 550SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); 551do cc += GET(cc, 1); while (*cc == OP_ALT); 552SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS); 553cc += 1 + LINK_SIZE; 554return cc; 555} 556 557static int no_alternatives(pcre_uchar* cc) 558{ 559int count = 0; 560SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); 561do 562 { 563 cc += GET(cc, 1); 564 count++; 565 } 566while (*cc == OP_ALT); 567SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS); 568return count; 569} 570 571static int ones_in_half_byte[16] = { 572 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3, 573 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4 574}; 575 576/* Functions whose might need modification for all new supported opcodes: 577 next_opcode 578 check_opcode_types 579 set_private_data_ptrs 580 get_framesize 581 init_frame 582 get_private_data_copy_length 583 copy_private_data 584 compile_matchingpath 585 compile_backtrackingpath 586*/ 587 588static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc) 589{ 590SLJIT_UNUSED_ARG(common); 591switch(*cc) 592 { 593 case OP_SOD: 594 case OP_SOM: 595 case OP_SET_SOM: 596 case OP_NOT_WORD_BOUNDARY: 597 case OP_WORD_BOUNDARY: 598 case OP_NOT_DIGIT: 599 case OP_DIGIT: 600 case OP_NOT_WHITESPACE: 601 case OP_WHITESPACE: 602 case OP_NOT_WORDCHAR: 603 case OP_WORDCHAR: 604 case OP_ANY: 605 case OP_ALLANY: 606 case OP_NOTPROP: 607 case OP_PROP: 608 case OP_ANYNL: 609 case OP_NOT_HSPACE: 610 case OP_HSPACE: 611 case OP_NOT_VSPACE: 612 case OP_VSPACE: 613 case OP_EXTUNI: 614 case OP_EODN: 615 case OP_EOD: 616 case OP_CIRC: 617 case OP_CIRCM: 618 case OP_DOLL: 619 case OP_DOLLM: 620 case OP_CRSTAR: 621 case OP_CRMINSTAR: 622 case OP_CRPLUS: 623 case OP_CRMINPLUS: 624 case OP_CRQUERY: 625 case OP_CRMINQUERY: 626 case OP_CRRANGE: 627 case OP_CRMINRANGE: 628 case OP_CRPOSSTAR: 629 case OP_CRPOSPLUS: 630 case OP_CRPOSQUERY: 631 case OP_CRPOSRANGE: 632 case OP_CLASS: 633 case OP_NCLASS: 634 case OP_REF: 635 case OP_REFI: 636 case OP_DNREF: 637 case OP_DNREFI: 638 case OP_RECURSE: 639 case OP_CALLOUT: 640 case OP_ALT: 641 case OP_KET: 642 case OP_KETRMAX: 643 case OP_KETRMIN: 644 case OP_KETRPOS: 645 case OP_REVERSE: 646 case OP_ASSERT: 647 case OP_ASSERT_NOT: 648 case OP_ASSERTBACK: 649 case OP_ASSERTBACK_NOT: 650 case OP_ONCE: 651 case OP_ONCE_NC: 652 case OP_BRA: 653 case OP_BRAPOS: 654 case OP_CBRA: 655 case OP_CBRAPOS: 656 case OP_COND: 657 case OP_SBRA: 658 case OP_SBRAPOS: 659 case OP_SCBRA: 660 case OP_SCBRAPOS: 661 case OP_SCOND: 662 case OP_CREF: 663 case OP_DNCREF: 664 case OP_RREF: 665 case OP_DNRREF: 666 case OP_DEF: 667 case OP_BRAZERO: 668 case OP_BRAMINZERO: 669 case OP_BRAPOSZERO: 670 case OP_PRUNE: 671 case OP_SKIP: 672 case OP_THEN: 673 case OP_COMMIT: 674 case OP_FAIL: 675 case OP_ACCEPT: 676 case OP_ASSERT_ACCEPT: 677 case OP_CLOSE: 678 case OP_SKIPZERO: 679 return cc + PRIV(OP_lengths)[*cc]; 680 681 case OP_CHAR: 682 case OP_CHARI: 683 case OP_NOT: 684 case OP_NOTI: 685 case OP_STAR: 686 case OP_MINSTAR: 687 case OP_PLUS: 688 case OP_MINPLUS: 689 case OP_QUERY: 690 case OP_MINQUERY: 691 case OP_UPTO: 692 case OP_MINUPTO: 693 case OP_EXACT: 694 case OP_POSSTAR: 695 case OP_POSPLUS: 696 case OP_POSQUERY: 697 case OP_POSUPTO: 698 case OP_STARI: 699 case OP_MINSTARI: 700 case OP_PLUSI: 701 case OP_MINPLUSI: 702 case OP_QUERYI: 703 case OP_MINQUERYI: 704 case OP_UPTOI: 705 case OP_MINUPTOI: 706 case OP_EXACTI: 707 case OP_POSSTARI: 708 case OP_POSPLUSI: 709 case OP_POSQUERYI: 710 case OP_POSUPTOI: 711 case OP_NOTSTAR: 712 case OP_NOTMINSTAR: 713 case OP_NOTPLUS: 714 case OP_NOTMINPLUS: 715 case OP_NOTQUERY: 716 case OP_NOTMINQUERY: 717 case OP_NOTUPTO: 718 case OP_NOTMINUPTO: 719 case OP_NOTEXACT: 720 case OP_NOTPOSSTAR: 721 case OP_NOTPOSPLUS: 722 case OP_NOTPOSQUERY: 723 case OP_NOTPOSUPTO: 724 case OP_NOTSTARI: 725 case OP_NOTMINSTARI: 726 case OP_NOTPLUSI: 727 case OP_NOTMINPLUSI: 728 case OP_NOTQUERYI: 729 case OP_NOTMINQUERYI: 730 case OP_NOTUPTOI: 731 case OP_NOTMINUPTOI: 732 case OP_NOTEXACTI: 733 case OP_NOTPOSSTARI: 734 case OP_NOTPOSPLUSI: 735 case OP_NOTPOSQUERYI: 736 case OP_NOTPOSUPTOI: 737 cc += PRIV(OP_lengths)[*cc]; 738#ifdef SUPPORT_UTF 739 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 740#endif 741 return cc; 742 743 /* Special cases. */ 744 case OP_TYPESTAR: 745 case OP_TYPEMINSTAR: 746 case OP_TYPEPLUS: 747 case OP_TYPEMINPLUS: 748 case OP_TYPEQUERY: 749 case OP_TYPEMINQUERY: 750 case OP_TYPEUPTO: 751 case OP_TYPEMINUPTO: 752 case OP_TYPEEXACT: 753 case OP_TYPEPOSSTAR: 754 case OP_TYPEPOSPLUS: 755 case OP_TYPEPOSQUERY: 756 case OP_TYPEPOSUPTO: 757 return cc + PRIV(OP_lengths)[*cc] - 1; 758 759 case OP_ANYBYTE: 760#ifdef SUPPORT_UTF 761 if (common->utf) return NULL; 762#endif 763 return cc + 1; 764 765#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 766 case OP_XCLASS: 767 return cc + GET(cc, 1); 768#endif 769 770 case OP_MARK: 771 case OP_PRUNE_ARG: 772 case OP_SKIP_ARG: 773 case OP_THEN_ARG: 774 return cc + 1 + 2 + cc[1]; 775 776 default: 777 /* All opcodes are supported now! */ 778 SLJIT_ASSERT_STOP(); 779 return NULL; 780 } 781} 782 783static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend) 784{ 785int count; 786pcre_uchar *slot; 787 788/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */ 789while (cc < ccend) 790 { 791 switch(*cc) 792 { 793 case OP_SET_SOM: 794 common->has_set_som = TRUE; 795 common->might_be_empty = TRUE; 796 cc += 1; 797 break; 798 799 case OP_REF: 800 case OP_REFI: 801 common->optimized_cbracket[GET2(cc, 1)] = 0; 802 cc += 1 + IMM2_SIZE; 803 break; 804 805 case OP_BRA: 806 case OP_CBRA: 807 case OP_SBRA: 808 case OP_SCBRA: 809 count = no_alternatives(cc); 810 if (count > 4) 811 common->read_only_data_size += count * sizeof(sljit_uw); 812 cc += 1 + LINK_SIZE + (*cc == OP_CBRA || *cc == OP_SCBRA ? IMM2_SIZE : 0); 813 break; 814 815 case OP_CBRAPOS: 816 case OP_SCBRAPOS: 817 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0; 818 cc += 1 + LINK_SIZE + IMM2_SIZE; 819 break; 820 821 case OP_COND: 822 case OP_SCOND: 823 /* Only AUTO_CALLOUT can insert this opcode. We do 824 not intend to support this case. */ 825 if (cc[1 + LINK_SIZE] == OP_CALLOUT) 826 return FALSE; 827 cc += 1 + LINK_SIZE; 828 break; 829 830 case OP_CREF: 831 common->optimized_cbracket[GET2(cc, 1)] = 0; 832 cc += 1 + IMM2_SIZE; 833 break; 834 835 case OP_DNREF: 836 case OP_DNREFI: 837 case OP_DNCREF: 838 count = GET2(cc, 1 + IMM2_SIZE); 839 slot = common->name_table + GET2(cc, 1) * common->name_entry_size; 840 while (count-- > 0) 841 { 842 common->optimized_cbracket[GET2(slot, 0)] = 0; 843 slot += common->name_entry_size; 844 } 845 cc += 1 + 2 * IMM2_SIZE; 846 break; 847 848 case OP_RECURSE: 849 /* Set its value only once. */ 850 if (common->recursive_head_ptr == 0) 851 { 852 common->recursive_head_ptr = common->ovector_start; 853 common->ovector_start += sizeof(sljit_sw); 854 } 855 cc += 1 + LINK_SIZE; 856 break; 857 858 case OP_CALLOUT: 859 if (common->capture_last_ptr == 0) 860 { 861 common->capture_last_ptr = common->ovector_start; 862 common->ovector_start += sizeof(sljit_sw); 863 } 864 cc += 2 + 2 * LINK_SIZE; 865 break; 866 867 case OP_THEN_ARG: 868 common->has_then = TRUE; 869 common->control_head_ptr = 1; 870 /* Fall through. */ 871 872 case OP_PRUNE_ARG: 873 common->needs_start_ptr = TRUE; 874 /* Fall through. */ 875 876 case OP_MARK: 877 if (common->mark_ptr == 0) 878 { 879 common->mark_ptr = common->ovector_start; 880 common->ovector_start += sizeof(sljit_sw); 881 } 882 cc += 1 + 2 + cc[1]; 883 break; 884 885 case OP_THEN: 886 common->has_then = TRUE; 887 common->control_head_ptr = 1; 888 /* Fall through. */ 889 890 case OP_PRUNE: 891 case OP_SKIP: 892 common->needs_start_ptr = TRUE; 893 cc += 1; 894 break; 895 896 case OP_SKIP_ARG: 897 common->control_head_ptr = 1; 898 common->has_skip_arg = TRUE; 899 cc += 1 + 2 + cc[1]; 900 break; 901 902 default: 903 cc = next_opcode(common, cc); 904 if (cc == NULL) 905 return FALSE; 906 break; 907 } 908 } 909return TRUE; 910} 911 912static int get_class_iterator_size(pcre_uchar *cc) 913{ 914switch(*cc) 915 { 916 case OP_CRSTAR: 917 case OP_CRPLUS: 918 return 2; 919 920 case OP_CRMINSTAR: 921 case OP_CRMINPLUS: 922 case OP_CRQUERY: 923 case OP_CRMINQUERY: 924 return 1; 925 926 case OP_CRRANGE: 927 case OP_CRMINRANGE: 928 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE)) 929 return 0; 930 return 2; 931 932 default: 933 return 0; 934 } 935} 936 937static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin) 938{ 939pcre_uchar *end = bracketend(begin); 940pcre_uchar *next; 941pcre_uchar *next_end; 942pcre_uchar *max_end; 943pcre_uchar type; 944sljit_sw length = end - begin; 945int min, max, i; 946 947/* Detect fixed iterations first. */ 948if (end[-(1 + LINK_SIZE)] != OP_KET) 949 return FALSE; 950 951/* Already detected repeat. */ 952if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0) 953 return TRUE; 954 955next = end; 956min = 1; 957while (1) 958 { 959 if (*next != *begin) 960 break; 961 next_end = bracketend(next); 962 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0) 963 break; 964 next = next_end; 965 min++; 966 } 967 968if (min == 2) 969 return FALSE; 970 971max = 0; 972max_end = next; 973if (*next == OP_BRAZERO || *next == OP_BRAMINZERO) 974 { 975 type = *next; 976 while (1) 977 { 978 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin) 979 break; 980 next_end = bracketend(next + 2 + LINK_SIZE); 981 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0) 982 break; 983 next = next_end; 984 max++; 985 } 986 987 if (next[0] == type && next[1] == *begin && max >= 1) 988 { 989 next_end = bracketend(next + 1); 990 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0) 991 { 992 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE) 993 if (*next_end != OP_KET) 994 break; 995 996 if (i == max) 997 { 998 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end; 999 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO; 1000 /* +2 the original and the last. */ 1001 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2; 1002 if (min == 1) 1003 return TRUE; 1004 min--; 1005 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE); 1006 } 1007 } 1008 } 1009 } 1010 1011if (min >= 3) 1012 { 1013 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end; 1014 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT; 1015 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min; 1016 return TRUE; 1017 } 1018 1019return FALSE; 1020} 1021 1022#define CASE_ITERATOR_PRIVATE_DATA_1 \ 1023 case OP_MINSTAR: \ 1024 case OP_MINPLUS: \ 1025 case OP_QUERY: \ 1026 case OP_MINQUERY: \ 1027 case OP_MINSTARI: \ 1028 case OP_MINPLUSI: \ 1029 case OP_QUERYI: \ 1030 case OP_MINQUERYI: \ 1031 case OP_NOTMINSTAR: \ 1032 case OP_NOTMINPLUS: \ 1033 case OP_NOTQUERY: \ 1034 case OP_NOTMINQUERY: \ 1035 case OP_NOTMINSTARI: \ 1036 case OP_NOTMINPLUSI: \ 1037 case OP_NOTQUERYI: \ 1038 case OP_NOTMINQUERYI: 1039 1040#define CASE_ITERATOR_PRIVATE_DATA_2A \ 1041 case OP_STAR: \ 1042 case OP_PLUS: \ 1043 case OP_STARI: \ 1044 case OP_PLUSI: \ 1045 case OP_NOTSTAR: \ 1046 case OP_NOTPLUS: \ 1047 case OP_NOTSTARI: \ 1048 case OP_NOTPLUSI: 1049 1050#define CASE_ITERATOR_PRIVATE_DATA_2B \ 1051 case OP_UPTO: \ 1052 case OP_MINUPTO: \ 1053 case OP_UPTOI: \ 1054 case OP_MINUPTOI: \ 1055 case OP_NOTUPTO: \ 1056 case OP_NOTMINUPTO: \ 1057 case OP_NOTUPTOI: \ 1058 case OP_NOTMINUPTOI: 1059 1060#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \ 1061 case OP_TYPEMINSTAR: \ 1062 case OP_TYPEMINPLUS: \ 1063 case OP_TYPEQUERY: \ 1064 case OP_TYPEMINQUERY: 1065 1066#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \ 1067 case OP_TYPESTAR: \ 1068 case OP_TYPEPLUS: 1069 1070#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \ 1071 case OP_TYPEUPTO: \ 1072 case OP_TYPEMINUPTO: 1073 1074static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend) 1075{ 1076pcre_uchar *cc = common->start; 1077pcre_uchar *alternative; 1078pcre_uchar *end = NULL; 1079int private_data_ptr = *private_data_start; 1080int space, size, bracketlen; 1081 1082while (cc < ccend) 1083 { 1084 space = 0; 1085 size = 0; 1086 bracketlen = 0; 1087 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE) 1088 return; 1089 1090 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND) 1091 if (detect_repeat(common, cc)) 1092 { 1093 /* These brackets are converted to repeats, so no global 1094 based single character repeat is allowed. */ 1095 if (cc >= end) 1096 end = bracketend(cc); 1097 } 1098 1099 switch(*cc) 1100 { 1101 case OP_KET: 1102 if (common->private_data_ptrs[cc + 1 - common->start] != 0) 1103 { 1104 common->private_data_ptrs[cc - common->start] = private_data_ptr; 1105 private_data_ptr += sizeof(sljit_sw); 1106 cc += common->private_data_ptrs[cc + 1 - common->start]; 1107 } 1108 cc += 1 + LINK_SIZE; 1109 break; 1110 1111 case OP_ASSERT: 1112 case OP_ASSERT_NOT: 1113 case OP_ASSERTBACK: 1114 case OP_ASSERTBACK_NOT: 1115 case OP_ONCE: 1116 case OP_ONCE_NC: 1117 case OP_BRAPOS: 1118 case OP_SBRA: 1119 case OP_SBRAPOS: 1120 case OP_SCOND: 1121 common->private_data_ptrs[cc - common->start] = private_data_ptr; 1122 private_data_ptr += sizeof(sljit_sw); 1123 bracketlen = 1 + LINK_SIZE; 1124 break; 1125 1126 case OP_CBRAPOS: 1127 case OP_SCBRAPOS: 1128 common->private_data_ptrs[cc - common->start] = private_data_ptr; 1129 private_data_ptr += sizeof(sljit_sw); 1130 bracketlen = 1 + LINK_SIZE + IMM2_SIZE; 1131 break; 1132 1133 case OP_COND: 1134 /* Might be a hidden SCOND. */ 1135 alternative = cc + GET(cc, 1); 1136 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) 1137 { 1138 common->private_data_ptrs[cc - common->start] = private_data_ptr; 1139 private_data_ptr += sizeof(sljit_sw); 1140 } 1141 bracketlen = 1 + LINK_SIZE; 1142 break; 1143 1144 case OP_BRA: 1145 bracketlen = 1 + LINK_SIZE; 1146 break; 1147 1148 case OP_CBRA: 1149 case OP_SCBRA: 1150 bracketlen = 1 + LINK_SIZE + IMM2_SIZE; 1151 break; 1152 1153 CASE_ITERATOR_PRIVATE_DATA_1 1154 space = 1; 1155 size = -2; 1156 break; 1157 1158 CASE_ITERATOR_PRIVATE_DATA_2A 1159 space = 2; 1160 size = -2; 1161 break; 1162 1163 CASE_ITERATOR_PRIVATE_DATA_2B 1164 space = 2; 1165 size = -(2 + IMM2_SIZE); 1166 break; 1167 1168 CASE_ITERATOR_TYPE_PRIVATE_DATA_1 1169 space = 1; 1170 size = 1; 1171 break; 1172 1173 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A 1174 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI) 1175 space = 2; 1176 size = 1; 1177 break; 1178 1179 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B 1180 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI) 1181 space = 2; 1182 size = 1 + IMM2_SIZE; 1183 break; 1184 1185 case OP_CLASS: 1186 case OP_NCLASS: 1187 size += 1 + 32 / sizeof(pcre_uchar); 1188 space = get_class_iterator_size(cc + size); 1189 break; 1190 1191#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 1192 case OP_XCLASS: 1193 size = GET(cc, 1); 1194 space = get_class_iterator_size(cc + size); 1195 break; 1196#endif 1197 1198 default: 1199 cc = next_opcode(common, cc); 1200 SLJIT_ASSERT(cc != NULL); 1201 break; 1202 } 1203 1204 /* Character iterators, which are not inside a repeated bracket, 1205 gets a private slot instead of allocating it on the stack. */ 1206 if (space > 0 && cc >= end) 1207 { 1208 common->private_data_ptrs[cc - common->start] = private_data_ptr; 1209 private_data_ptr += sizeof(sljit_sw) * space; 1210 } 1211 1212 if (size != 0) 1213 { 1214 if (size < 0) 1215 { 1216 cc += -size; 1217#ifdef SUPPORT_UTF 1218 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 1219#endif 1220 } 1221 else 1222 cc += size; 1223 } 1224 1225 if (bracketlen > 0) 1226 { 1227 if (cc >= end) 1228 { 1229 end = bracketend(cc); 1230 if (end[-1 - LINK_SIZE] == OP_KET) 1231 end = NULL; 1232 } 1233 cc += bracketlen; 1234 } 1235 } 1236*private_data_start = private_data_ptr; 1237} 1238 1239/* Returns with a frame_types (always < 0) if no need for frame. */ 1240static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head) 1241{ 1242int length = 0; 1243int possessive = 0; 1244BOOL stack_restore = FALSE; 1245BOOL setsom_found = recursive; 1246BOOL setmark_found = recursive; 1247/* The last capture is a local variable even for recursions. */ 1248BOOL capture_last_found = FALSE; 1249 1250#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD 1251SLJIT_ASSERT(common->control_head_ptr != 0); 1252*needs_control_head = TRUE; 1253#else 1254*needs_control_head = FALSE; 1255#endif 1256 1257if (ccend == NULL) 1258 { 1259 ccend = bracketend(cc) - (1 + LINK_SIZE); 1260 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)) 1261 { 1262 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3; 1263 /* This is correct regardless of common->capture_last_ptr. */ 1264 capture_last_found = TRUE; 1265 } 1266 cc = next_opcode(common, cc); 1267 } 1268 1269SLJIT_ASSERT(cc != NULL); 1270while (cc < ccend) 1271 switch(*cc) 1272 { 1273 case OP_SET_SOM: 1274 SLJIT_ASSERT(common->has_set_som); 1275 stack_restore = TRUE; 1276 if (!setsom_found) 1277 { 1278 length += 2; 1279 setsom_found = TRUE; 1280 } 1281 cc += 1; 1282 break; 1283 1284 case OP_MARK: 1285 case OP_PRUNE_ARG: 1286 case OP_THEN_ARG: 1287 SLJIT_ASSERT(common->mark_ptr != 0); 1288 stack_restore = TRUE; 1289 if (!setmark_found) 1290 { 1291 length += 2; 1292 setmark_found = TRUE; 1293 } 1294 if (common->control_head_ptr != 0) 1295 *needs_control_head = TRUE; 1296 cc += 1 + 2 + cc[1]; 1297 break; 1298 1299 case OP_RECURSE: 1300 stack_restore = TRUE; 1301 if (common->has_set_som && !setsom_found) 1302 { 1303 length += 2; 1304 setsom_found = TRUE; 1305 } 1306 if (common->mark_ptr != 0 && !setmark_found) 1307 { 1308 length += 2; 1309 setmark_found = TRUE; 1310 } 1311 if (common->capture_last_ptr != 0 && !capture_last_found) 1312 { 1313 length += 2; 1314 capture_last_found = TRUE; 1315 } 1316 cc += 1 + LINK_SIZE; 1317 break; 1318 1319 case OP_CBRA: 1320 case OP_CBRAPOS: 1321 case OP_SCBRA: 1322 case OP_SCBRAPOS: 1323 stack_restore = TRUE; 1324 if (common->capture_last_ptr != 0 && !capture_last_found) 1325 { 1326 length += 2; 1327 capture_last_found = TRUE; 1328 } 1329 length += 3; 1330 cc += 1 + LINK_SIZE + IMM2_SIZE; 1331 break; 1332 1333 default: 1334 stack_restore = TRUE; 1335 /* Fall through. */ 1336 1337 case OP_NOT_WORD_BOUNDARY: 1338 case OP_WORD_BOUNDARY: 1339 case OP_NOT_DIGIT: 1340 case OP_DIGIT: 1341 case OP_NOT_WHITESPACE: 1342 case OP_WHITESPACE: 1343 case OP_NOT_WORDCHAR: 1344 case OP_WORDCHAR: 1345 case OP_ANY: 1346 case OP_ALLANY: 1347 case OP_ANYBYTE: 1348 case OP_NOTPROP: 1349 case OP_PROP: 1350 case OP_ANYNL: 1351 case OP_NOT_HSPACE: 1352 case OP_HSPACE: 1353 case OP_NOT_VSPACE: 1354 case OP_VSPACE: 1355 case OP_EXTUNI: 1356 case OP_EODN: 1357 case OP_EOD: 1358 case OP_CIRC: 1359 case OP_CIRCM: 1360 case OP_DOLL: 1361 case OP_DOLLM: 1362 case OP_CHAR: 1363 case OP_CHARI: 1364 case OP_NOT: 1365 case OP_NOTI: 1366 1367 case OP_EXACT: 1368 case OP_POSSTAR: 1369 case OP_POSPLUS: 1370 case OP_POSQUERY: 1371 case OP_POSUPTO: 1372 1373 case OP_EXACTI: 1374 case OP_POSSTARI: 1375 case OP_POSPLUSI: 1376 case OP_POSQUERYI: 1377 case OP_POSUPTOI: 1378 1379 case OP_NOTEXACT: 1380 case OP_NOTPOSSTAR: 1381 case OP_NOTPOSPLUS: 1382 case OP_NOTPOSQUERY: 1383 case OP_NOTPOSUPTO: 1384 1385 case OP_NOTEXACTI: 1386 case OP_NOTPOSSTARI: 1387 case OP_NOTPOSPLUSI: 1388 case OP_NOTPOSQUERYI: 1389 case OP_NOTPOSUPTOI: 1390 1391 case OP_TYPEEXACT: 1392 case OP_TYPEPOSSTAR: 1393 case OP_TYPEPOSPLUS: 1394 case OP_TYPEPOSQUERY: 1395 case OP_TYPEPOSUPTO: 1396 1397 case OP_CLASS: 1398 case OP_NCLASS: 1399 case OP_XCLASS: 1400 1401 cc = next_opcode(common, cc); 1402 SLJIT_ASSERT(cc != NULL); 1403 break; 1404 } 1405 1406/* Possessive quantifiers can use a special case. */ 1407if (SLJIT_UNLIKELY(possessive == length)) 1408 return stack_restore ? no_frame : no_stack; 1409 1410if (length > 0) 1411 return length + 1; 1412return stack_restore ? no_frame : no_stack; 1413} 1414 1415static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive) 1416{ 1417DEFINE_COMPILER; 1418BOOL setsom_found = recursive; 1419BOOL setmark_found = recursive; 1420/* The last capture is a local variable even for recursions. */ 1421BOOL capture_last_found = FALSE; 1422int offset; 1423 1424/* >= 1 + shortest item size (2) */ 1425SLJIT_UNUSED_ARG(stacktop); 1426SLJIT_ASSERT(stackpos >= stacktop + 2); 1427 1428stackpos = STACK(stackpos); 1429if (ccend == NULL) 1430 { 1431 ccend = bracketend(cc) - (1 + LINK_SIZE); 1432 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)) 1433 cc = next_opcode(common, cc); 1434 } 1435 1436SLJIT_ASSERT(cc != NULL); 1437while (cc < ccend) 1438 switch(*cc) 1439 { 1440 case OP_SET_SOM: 1441 SLJIT_ASSERT(common->has_set_som); 1442 if (!setsom_found) 1443 { 1444 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); 1445 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); 1446 stackpos += (int)sizeof(sljit_sw); 1447 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); 1448 stackpos += (int)sizeof(sljit_sw); 1449 setsom_found = TRUE; 1450 } 1451 cc += 1; 1452 break; 1453 1454 case OP_MARK: 1455 case OP_PRUNE_ARG: 1456 case OP_THEN_ARG: 1457 SLJIT_ASSERT(common->mark_ptr != 0); 1458 if (!setmark_found) 1459 { 1460 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); 1461 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); 1462 stackpos += (int)sizeof(sljit_sw); 1463 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); 1464 stackpos += (int)sizeof(sljit_sw); 1465 setmark_found = TRUE; 1466 } 1467 cc += 1 + 2 + cc[1]; 1468 break; 1469 1470 case OP_RECURSE: 1471 if (common->has_set_som && !setsom_found) 1472 { 1473 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); 1474 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); 1475 stackpos += (int)sizeof(sljit_sw); 1476 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); 1477 stackpos += (int)sizeof(sljit_sw); 1478 setsom_found = TRUE; 1479 } 1480 if (common->mark_ptr != 0 && !setmark_found) 1481 { 1482 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); 1483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); 1484 stackpos += (int)sizeof(sljit_sw); 1485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); 1486 stackpos += (int)sizeof(sljit_sw); 1487 setmark_found = TRUE; 1488 } 1489 if (common->capture_last_ptr != 0 && !capture_last_found) 1490 { 1491 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); 1492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); 1493 stackpos += (int)sizeof(sljit_sw); 1494 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); 1495 stackpos += (int)sizeof(sljit_sw); 1496 capture_last_found = TRUE; 1497 } 1498 cc += 1 + LINK_SIZE; 1499 break; 1500 1501 case OP_CBRA: 1502 case OP_CBRAPOS: 1503 case OP_SCBRA: 1504 case OP_SCBRAPOS: 1505 if (common->capture_last_ptr != 0 && !capture_last_found) 1506 { 1507 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); 1508 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); 1509 stackpos += (int)sizeof(sljit_sw); 1510 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); 1511 stackpos += (int)sizeof(sljit_sw); 1512 capture_last_found = TRUE; 1513 } 1514 offset = (GET2(cc, 1 + LINK_SIZE)) << 1; 1515 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset)); 1516 stackpos += (int)sizeof(sljit_sw); 1517 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); 1518 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); 1519 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); 1520 stackpos += (int)sizeof(sljit_sw); 1521 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0); 1522 stackpos += (int)sizeof(sljit_sw); 1523 1524 cc += 1 + LINK_SIZE + IMM2_SIZE; 1525 break; 1526 1527 default: 1528 cc = next_opcode(common, cc); 1529 SLJIT_ASSERT(cc != NULL); 1530 break; 1531 } 1532 1533OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0); 1534SLJIT_ASSERT(stackpos == STACK(stacktop)); 1535} 1536 1537static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head) 1538{ 1539int private_data_length = needs_control_head ? 3 : 2; 1540int size; 1541pcre_uchar *alternative; 1542/* Calculate the sum of the private machine words. */ 1543while (cc < ccend) 1544 { 1545 size = 0; 1546 switch(*cc) 1547 { 1548 case OP_KET: 1549 if (PRIVATE_DATA(cc) != 0) 1550 private_data_length++; 1551 cc += 1 + LINK_SIZE; 1552 break; 1553 1554 case OP_ASSERT: 1555 case OP_ASSERT_NOT: 1556 case OP_ASSERTBACK: 1557 case OP_ASSERTBACK_NOT: 1558 case OP_ONCE: 1559 case OP_ONCE_NC: 1560 case OP_BRAPOS: 1561 case OP_SBRA: 1562 case OP_SBRAPOS: 1563 case OP_SCOND: 1564 private_data_length++; 1565 cc += 1 + LINK_SIZE; 1566 break; 1567 1568 case OP_CBRA: 1569 case OP_SCBRA: 1570 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) 1571 private_data_length++; 1572 cc += 1 + LINK_SIZE + IMM2_SIZE; 1573 break; 1574 1575 case OP_CBRAPOS: 1576 case OP_SCBRAPOS: 1577 private_data_length += 2; 1578 cc += 1 + LINK_SIZE + IMM2_SIZE; 1579 break; 1580 1581 case OP_COND: 1582 /* Might be a hidden SCOND. */ 1583 alternative = cc + GET(cc, 1); 1584 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) 1585 private_data_length++; 1586 cc += 1 + LINK_SIZE; 1587 break; 1588 1589 CASE_ITERATOR_PRIVATE_DATA_1 1590 if (PRIVATE_DATA(cc)) 1591 private_data_length++; 1592 cc += 2; 1593#ifdef SUPPORT_UTF 1594 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 1595#endif 1596 break; 1597 1598 CASE_ITERATOR_PRIVATE_DATA_2A 1599 if (PRIVATE_DATA(cc)) 1600 private_data_length += 2; 1601 cc += 2; 1602#ifdef SUPPORT_UTF 1603 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 1604#endif 1605 break; 1606 1607 CASE_ITERATOR_PRIVATE_DATA_2B 1608 if (PRIVATE_DATA(cc)) 1609 private_data_length += 2; 1610 cc += 2 + IMM2_SIZE; 1611#ifdef SUPPORT_UTF 1612 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 1613#endif 1614 break; 1615 1616 CASE_ITERATOR_TYPE_PRIVATE_DATA_1 1617 if (PRIVATE_DATA(cc)) 1618 private_data_length++; 1619 cc += 1; 1620 break; 1621 1622 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A 1623 if (PRIVATE_DATA(cc)) 1624 private_data_length += 2; 1625 cc += 1; 1626 break; 1627 1628 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B 1629 if (PRIVATE_DATA(cc)) 1630 private_data_length += 2; 1631 cc += 1 + IMM2_SIZE; 1632 break; 1633 1634 case OP_CLASS: 1635 case OP_NCLASS: 1636#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 1637 case OP_XCLASS: 1638 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar); 1639#else 1640 size = 1 + 32 / (int)sizeof(pcre_uchar); 1641#endif 1642 if (PRIVATE_DATA(cc)) 1643 private_data_length += get_class_iterator_size(cc + size); 1644 cc += size; 1645 break; 1646 1647 default: 1648 cc = next_opcode(common, cc); 1649 SLJIT_ASSERT(cc != NULL); 1650 break; 1651 } 1652 } 1653SLJIT_ASSERT(cc == ccend); 1654return private_data_length; 1655} 1656 1657static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, 1658 BOOL save, int stackptr, int stacktop, BOOL needs_control_head) 1659{ 1660DEFINE_COMPILER; 1661int srcw[2]; 1662int count, size; 1663BOOL tmp1next = TRUE; 1664BOOL tmp1empty = TRUE; 1665BOOL tmp2empty = TRUE; 1666pcre_uchar *alternative; 1667enum { 1668 start, 1669 loop, 1670 end 1671} status; 1672 1673status = save ? start : loop; 1674stackptr = STACK(stackptr - 2); 1675stacktop = STACK(stacktop - 1); 1676 1677if (!save) 1678 { 1679 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw); 1680 if (stackptr < stacktop) 1681 { 1682 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr); 1683 stackptr += sizeof(sljit_sw); 1684 tmp1empty = FALSE; 1685 } 1686 if (stackptr < stacktop) 1687 { 1688 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr); 1689 stackptr += sizeof(sljit_sw); 1690 tmp2empty = FALSE; 1691 } 1692 /* The tmp1next must be TRUE in either way. */ 1693 } 1694 1695do 1696 { 1697 count = 0; 1698 switch(status) 1699 { 1700 case start: 1701 SLJIT_ASSERT(save && common->recursive_head_ptr != 0); 1702 count = 1; 1703 srcw[0] = common->recursive_head_ptr; 1704 if (needs_control_head) 1705 { 1706 SLJIT_ASSERT(common->control_head_ptr != 0); 1707 count = 2; 1708 srcw[1] = common->control_head_ptr; 1709 } 1710 status = loop; 1711 break; 1712 1713 case loop: 1714 if (cc >= ccend) 1715 { 1716 status = end; 1717 break; 1718 } 1719 1720 switch(*cc) 1721 { 1722 case OP_KET: 1723 if (PRIVATE_DATA(cc) != 0) 1724 { 1725 count = 1; 1726 srcw[0] = PRIVATE_DATA(cc); 1727 } 1728 cc += 1 + LINK_SIZE; 1729 break; 1730 1731 case OP_ASSERT: 1732 case OP_ASSERT_NOT: 1733 case OP_ASSERTBACK: 1734 case OP_ASSERTBACK_NOT: 1735 case OP_ONCE: 1736 case OP_ONCE_NC: 1737 case OP_BRAPOS: 1738 case OP_SBRA: 1739 case OP_SBRAPOS: 1740 case OP_SCOND: 1741 count = 1; 1742 srcw[0] = PRIVATE_DATA(cc); 1743 SLJIT_ASSERT(srcw[0] != 0); 1744 cc += 1 + LINK_SIZE; 1745 break; 1746 1747 case OP_CBRA: 1748 case OP_SCBRA: 1749 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) 1750 { 1751 count = 1; 1752 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); 1753 } 1754 cc += 1 + LINK_SIZE + IMM2_SIZE; 1755 break; 1756 1757 case OP_CBRAPOS: 1758 case OP_SCBRAPOS: 1759 count = 2; 1760 srcw[0] = PRIVATE_DATA(cc); 1761 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); 1762 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0); 1763 cc += 1 + LINK_SIZE + IMM2_SIZE; 1764 break; 1765 1766 case OP_COND: 1767 /* Might be a hidden SCOND. */ 1768 alternative = cc + GET(cc, 1); 1769 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) 1770 { 1771 count = 1; 1772 srcw[0] = PRIVATE_DATA(cc); 1773 SLJIT_ASSERT(srcw[0] != 0); 1774 } 1775 cc += 1 + LINK_SIZE; 1776 break; 1777 1778 CASE_ITERATOR_PRIVATE_DATA_1 1779 if (PRIVATE_DATA(cc)) 1780 { 1781 count = 1; 1782 srcw[0] = PRIVATE_DATA(cc); 1783 } 1784 cc += 2; 1785#ifdef SUPPORT_UTF 1786 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 1787#endif 1788 break; 1789 1790 CASE_ITERATOR_PRIVATE_DATA_2A 1791 if (PRIVATE_DATA(cc)) 1792 { 1793 count = 2; 1794 srcw[0] = PRIVATE_DATA(cc); 1795 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw); 1796 } 1797 cc += 2; 1798#ifdef SUPPORT_UTF 1799 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 1800#endif 1801 break; 1802 1803 CASE_ITERATOR_PRIVATE_DATA_2B 1804 if (PRIVATE_DATA(cc)) 1805 { 1806 count = 2; 1807 srcw[0] = PRIVATE_DATA(cc); 1808 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw); 1809 } 1810 cc += 2 + IMM2_SIZE; 1811#ifdef SUPPORT_UTF 1812 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 1813#endif 1814 break; 1815 1816 CASE_ITERATOR_TYPE_PRIVATE_DATA_1 1817 if (PRIVATE_DATA(cc)) 1818 { 1819 count = 1; 1820 srcw[0] = PRIVATE_DATA(cc); 1821 } 1822 cc += 1; 1823 break; 1824 1825 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A 1826 if (PRIVATE_DATA(cc)) 1827 { 1828 count = 2; 1829 srcw[0] = PRIVATE_DATA(cc); 1830 srcw[1] = srcw[0] + sizeof(sljit_sw); 1831 } 1832 cc += 1; 1833 break; 1834 1835 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B 1836 if (PRIVATE_DATA(cc)) 1837 { 1838 count = 2; 1839 srcw[0] = PRIVATE_DATA(cc); 1840 srcw[1] = srcw[0] + sizeof(sljit_sw); 1841 } 1842 cc += 1 + IMM2_SIZE; 1843 break; 1844 1845 case OP_CLASS: 1846 case OP_NCLASS: 1847#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 1848 case OP_XCLASS: 1849 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar); 1850#else 1851 size = 1 + 32 / (int)sizeof(pcre_uchar); 1852#endif 1853 if (PRIVATE_DATA(cc)) 1854 switch(get_class_iterator_size(cc + size)) 1855 { 1856 case 1: 1857 count = 1; 1858 srcw[0] = PRIVATE_DATA(cc); 1859 break; 1860 1861 case 2: 1862 count = 2; 1863 srcw[0] = PRIVATE_DATA(cc); 1864 srcw[1] = srcw[0] + sizeof(sljit_sw); 1865 break; 1866 1867 default: 1868 SLJIT_ASSERT_STOP(); 1869 break; 1870 } 1871 cc += size; 1872 break; 1873 1874 default: 1875 cc = next_opcode(common, cc); 1876 SLJIT_ASSERT(cc != NULL); 1877 break; 1878 } 1879 break; 1880 1881 case end: 1882 SLJIT_ASSERT_STOP(); 1883 break; 1884 } 1885 1886 while (count > 0) 1887 { 1888 count--; 1889 if (save) 1890 { 1891 if (tmp1next) 1892 { 1893 if (!tmp1empty) 1894 { 1895 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); 1896 stackptr += sizeof(sljit_sw); 1897 } 1898 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]); 1899 tmp1empty = FALSE; 1900 tmp1next = FALSE; 1901 } 1902 else 1903 { 1904 if (!tmp2empty) 1905 { 1906 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); 1907 stackptr += sizeof(sljit_sw); 1908 } 1909 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]); 1910 tmp2empty = FALSE; 1911 tmp1next = TRUE; 1912 } 1913 } 1914 else 1915 { 1916 if (tmp1next) 1917 { 1918 SLJIT_ASSERT(!tmp1empty); 1919 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0); 1920 tmp1empty = stackptr >= stacktop; 1921 if (!tmp1empty) 1922 { 1923 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr); 1924 stackptr += sizeof(sljit_sw); 1925 } 1926 tmp1next = FALSE; 1927 } 1928 else 1929 { 1930 SLJIT_ASSERT(!tmp2empty); 1931 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0); 1932 tmp2empty = stackptr >= stacktop; 1933 if (!tmp2empty) 1934 { 1935 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr); 1936 stackptr += sizeof(sljit_sw); 1937 } 1938 tmp1next = TRUE; 1939 } 1940 } 1941 } 1942 } 1943while (status != end); 1944 1945if (save) 1946 { 1947 if (tmp1next) 1948 { 1949 if (!tmp1empty) 1950 { 1951 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); 1952 stackptr += sizeof(sljit_sw); 1953 } 1954 if (!tmp2empty) 1955 { 1956 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); 1957 stackptr += sizeof(sljit_sw); 1958 } 1959 } 1960 else 1961 { 1962 if (!tmp2empty) 1963 { 1964 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); 1965 stackptr += sizeof(sljit_sw); 1966 } 1967 if (!tmp1empty) 1968 { 1969 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); 1970 stackptr += sizeof(sljit_sw); 1971 } 1972 } 1973 } 1974SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty))); 1975} 1976 1977static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset) 1978{ 1979pcre_uchar *end = bracketend(cc); 1980BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT; 1981 1982/* Assert captures then. */ 1983if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) 1984 current_offset = NULL; 1985/* Conditional block does not. */ 1986if (*cc == OP_COND || *cc == OP_SCOND) 1987 has_alternatives = FALSE; 1988 1989cc = next_opcode(common, cc); 1990if (has_alternatives) 1991 current_offset = common->then_offsets + (cc - common->start); 1992 1993while (cc < end) 1994 { 1995 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND)) 1996 cc = set_then_offsets(common, cc, current_offset); 1997 else 1998 { 1999 if (*cc == OP_ALT && has_alternatives) 2000 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start); 2001 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL) 2002 *current_offset = 1; 2003 cc = next_opcode(common, cc); 2004 } 2005 } 2006 2007return end; 2008} 2009 2010#undef CASE_ITERATOR_PRIVATE_DATA_1 2011#undef CASE_ITERATOR_PRIVATE_DATA_2A 2012#undef CASE_ITERATOR_PRIVATE_DATA_2B 2013#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1 2014#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A 2015#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B 2016 2017static SLJIT_INLINE BOOL is_powerof2(unsigned int value) 2018{ 2019return (value & (value - 1)) == 0; 2020} 2021 2022static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label) 2023{ 2024while (list) 2025 { 2026 /* sljit_set_label is clever enough to do nothing 2027 if either the jump or the label is NULL. */ 2028 SET_LABEL(list->jump, label); 2029 list = list->next; 2030 } 2031} 2032 2033static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump) 2034{ 2035jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list)); 2036if (list_item) 2037 { 2038 list_item->next = *list; 2039 list_item->jump = jump; 2040 *list = list_item; 2041 } 2042} 2043 2044static void add_stub(compiler_common *common, struct sljit_jump *start) 2045{ 2046DEFINE_COMPILER; 2047stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list)); 2048 2049if (list_item) 2050 { 2051 list_item->start = start; 2052 list_item->quit = LABEL(); 2053 list_item->next = common->stubs; 2054 common->stubs = list_item; 2055 } 2056} 2057 2058static void flush_stubs(compiler_common *common) 2059{ 2060DEFINE_COMPILER; 2061stub_list* list_item = common->stubs; 2062 2063while (list_item) 2064 { 2065 JUMPHERE(list_item->start); 2066 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL)); 2067 JUMPTO(SLJIT_JUMP, list_item->quit); 2068 list_item = list_item->next; 2069 } 2070common->stubs = NULL; 2071} 2072 2073static void add_label_addr(compiler_common *common, sljit_uw *update_addr) 2074{ 2075DEFINE_COMPILER; 2076label_addr_list *label_addr; 2077 2078label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list)); 2079if (label_addr == NULL) 2080 return; 2081label_addr->label = LABEL(); 2082label_addr->update_addr = update_addr; 2083label_addr->next = common->label_addrs; 2084common->label_addrs = label_addr; 2085} 2086 2087static SLJIT_INLINE void count_match(compiler_common *common) 2088{ 2089DEFINE_COMPILER; 2090 2091OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1); 2092add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO)); 2093} 2094 2095static SLJIT_INLINE void allocate_stack(compiler_common *common, int size) 2096{ 2097/* May destroy all locals and registers except TMP2. */ 2098DEFINE_COMPILER; 2099 2100OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); 2101#ifdef DESTROY_REGISTERS 2102OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345); 2103OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); 2104OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); 2105OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0); 2106OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0); 2107#endif 2108add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0)); 2109} 2110 2111static SLJIT_INLINE void free_stack(compiler_common *common, int size) 2112{ 2113DEFINE_COMPILER; 2114OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); 2115} 2116 2117static SLJIT_INLINE void reset_ovector(compiler_common *common, int length) 2118{ 2119DEFINE_COMPILER; 2120struct sljit_label *loop; 2121int i; 2122 2123/* At this point we can freely use all temporary registers. */ 2124SLJIT_ASSERT(length > 1); 2125/* TMP1 returns with begin - 1. */ 2126OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1)); 2127if (length < 8) 2128 { 2129 for (i = 1; i < length; i++) 2130 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0); 2131 } 2132else 2133 { 2134 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START); 2135 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1); 2136 loop = LABEL(); 2137 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0); 2138 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1); 2139 JUMPTO(SLJIT_C_NOT_ZERO, loop); 2140 } 2141} 2142 2143static SLJIT_INLINE void do_reset_match(compiler_common *common, int length) 2144{ 2145DEFINE_COMPILER; 2146struct sljit_label *loop; 2147int i; 2148 2149SLJIT_ASSERT(length > 1); 2150/* OVECTOR(1) contains the "string begin - 1" constant. */ 2151if (length > 2) 2152 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); 2153if (length < 8) 2154 { 2155 for (i = 2; i < length; i++) 2156 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0); 2157 } 2158else 2159 { 2160 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw)); 2161 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2); 2162 loop = LABEL(); 2163 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0); 2164 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1); 2165 JUMPTO(SLJIT_C_NOT_ZERO, loop); 2166 } 2167 2168OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0); 2169if (common->mark_ptr != 0) 2170 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0); 2171if (common->control_head_ptr != 0) 2172 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); 2173OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack)); 2174OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); 2175OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base)); 2176} 2177 2178static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg) 2179{ 2180while (current != NULL) 2181 { 2182 switch (current[-2]) 2183 { 2184 case type_then_trap: 2185 break; 2186 2187 case type_mark: 2188 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0) 2189 return current[-4]; 2190 break; 2191 2192 default: 2193 SLJIT_ASSERT_STOP(); 2194 break; 2195 } 2196 current = (sljit_sw*)current[-1]; 2197 } 2198return -1; 2199} 2200 2201static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket) 2202{ 2203DEFINE_COMPILER; 2204struct sljit_label *loop; 2205struct sljit_jump *early_quit; 2206 2207/* At this point we can freely use all registers. */ 2208OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); 2209OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0); 2210 2211OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); 2212if (common->mark_ptr != 0) 2213 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); 2214OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count)); 2215if (common->mark_ptr != 0) 2216 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0); 2217OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int)); 2218OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin)); 2219GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START); 2220/* Unlikely, but possible */ 2221early_quit = CMP(SLJIT_C_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0); 2222loop = LABEL(); 2223OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0); 2224OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw)); 2225/* Copy the integer value to the output buffer */ 2226#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 2227OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT); 2228#endif 2229OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0); 2230OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); 2231JUMPTO(SLJIT_C_NOT_ZERO, loop); 2232JUMPHERE(early_quit); 2233 2234/* Calculate the return value, which is the maximum ovector value. */ 2235if (topbracket > 1) 2236 { 2237 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw)); 2238 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1); 2239 2240 /* OVECTOR(0) is never equal to SLJIT_S2. */ 2241 loop = LABEL(); 2242 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))); 2243 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); 2244 CMPTO(SLJIT_C_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop); 2245 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0); 2246 } 2247else 2248 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1); 2249} 2250 2251static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit) 2252{ 2253DEFINE_COMPILER; 2254struct sljit_jump *jump; 2255 2256SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2); 2257SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0 2258 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0)); 2259 2260OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0); 2261OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL); 2262OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count)); 2263CMPTO(SLJIT_C_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit); 2264 2265/* Store match begin and end. */ 2266OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin)); 2267OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets)); 2268 2269jump = CMP(SLJIT_C_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3); 2270OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0); 2271#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 2272OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT); 2273#endif 2274OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0); 2275JUMPHERE(jump); 2276 2277OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start); 2278OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0); 2279#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 2280OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT); 2281#endif 2282OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0); 2283 2284OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0); 2285#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 2286OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT); 2287#endif 2288OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0); 2289 2290JUMPTO(SLJIT_JUMP, quit); 2291} 2292 2293static SLJIT_INLINE void check_start_used_ptr(compiler_common *common) 2294{ 2295/* May destroy TMP1. */ 2296DEFINE_COMPILER; 2297struct sljit_jump *jump; 2298 2299if (common->mode == JIT_PARTIAL_SOFT_COMPILE) 2300 { 2301 /* The value of -1 must be kept for start_used_ptr! */ 2302 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1); 2303 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting 2304 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */ 2305 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0); 2306 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); 2307 JUMPHERE(jump); 2308 } 2309else if (common->mode == JIT_PARTIAL_HARD_COMPILE) 2310 { 2311 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); 2312 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); 2313 JUMPHERE(jump); 2314 } 2315} 2316 2317static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc) 2318{ 2319/* Detects if the character has an othercase. */ 2320unsigned int c; 2321 2322#ifdef SUPPORT_UTF 2323if (common->utf) 2324 { 2325 GETCHAR(c, cc); 2326 if (c > 127) 2327 { 2328#ifdef SUPPORT_UCP 2329 return c != UCD_OTHERCASE(c); 2330#else 2331 return FALSE; 2332#endif 2333 } 2334#ifndef COMPILE_PCRE8 2335 return common->fcc[c] != c; 2336#endif 2337 } 2338else 2339#endif 2340 c = *cc; 2341return MAX_255(c) ? common->fcc[c] != c : FALSE; 2342} 2343 2344static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c) 2345{ 2346/* Returns with the othercase. */ 2347#ifdef SUPPORT_UTF 2348if (common->utf && c > 127) 2349 { 2350#ifdef SUPPORT_UCP 2351 return UCD_OTHERCASE(c); 2352#else 2353 return c; 2354#endif 2355 } 2356#endif 2357return TABLE_GET(c, common->fcc, c); 2358} 2359 2360static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc) 2361{ 2362/* Detects if the character and its othercase has only 1 bit difference. */ 2363unsigned int c, oc, bit; 2364#if defined SUPPORT_UTF && defined COMPILE_PCRE8 2365int n; 2366#endif 2367 2368#ifdef SUPPORT_UTF 2369if (common->utf) 2370 { 2371 GETCHAR(c, cc); 2372 if (c <= 127) 2373 oc = common->fcc[c]; 2374 else 2375 { 2376#ifdef SUPPORT_UCP 2377 oc = UCD_OTHERCASE(c); 2378#else 2379 oc = c; 2380#endif 2381 } 2382 } 2383else 2384 { 2385 c = *cc; 2386 oc = TABLE_GET(c, common->fcc, c); 2387 } 2388#else 2389c = *cc; 2390oc = TABLE_GET(c, common->fcc, c); 2391#endif 2392 2393SLJIT_ASSERT(c != oc); 2394 2395bit = c ^ oc; 2396/* Optimized for English alphabet. */ 2397if (c <= 127 && bit == 0x20) 2398 return (0 << 8) | 0x20; 2399 2400/* Since c != oc, they must have at least 1 bit difference. */ 2401if (!is_powerof2(bit)) 2402 return 0; 2403 2404#if defined COMPILE_PCRE8 2405 2406#ifdef SUPPORT_UTF 2407if (common->utf && c > 127) 2408 { 2409 n = GET_EXTRALEN(*cc); 2410 while ((bit & 0x3f) == 0) 2411 { 2412 n--; 2413 bit >>= 6; 2414 } 2415 return (n << 8) | bit; 2416 } 2417#endif /* SUPPORT_UTF */ 2418return (0 << 8) | bit; 2419 2420#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 2421 2422#ifdef SUPPORT_UTF 2423if (common->utf && c > 65535) 2424 { 2425 if (bit >= (1 << 10)) 2426 bit >>= 10; 2427 else 2428 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8)); 2429 } 2430#endif /* SUPPORT_UTF */ 2431return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8)); 2432 2433#endif /* COMPILE_PCRE[8|16|32] */ 2434} 2435 2436static void check_partial(compiler_common *common, BOOL force) 2437{ 2438/* Checks whether a partial matching is occurred. Does not modify registers. */ 2439DEFINE_COMPILER; 2440struct sljit_jump *jump = NULL; 2441 2442SLJIT_ASSERT(!force || common->mode != JIT_COMPILE); 2443 2444if (common->mode == JIT_COMPILE) 2445 return; 2446 2447if (!force) 2448 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); 2449else if (common->mode == JIT_PARTIAL_SOFT_COMPILE) 2450 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1); 2451 2452if (common->mode == JIT_PARTIAL_SOFT_COMPILE) 2453 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); 2454else 2455 { 2456 if (common->partialmatchlabel != NULL) 2457 JUMPTO(SLJIT_JUMP, common->partialmatchlabel); 2458 else 2459 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); 2460 } 2461 2462if (jump != NULL) 2463 JUMPHERE(jump); 2464} 2465 2466static void check_str_end(compiler_common *common, jump_list **end_reached) 2467{ 2468/* Does not affect registers. Usually used in a tight spot. */ 2469DEFINE_COMPILER; 2470struct sljit_jump *jump; 2471 2472if (common->mode == JIT_COMPILE) 2473 { 2474 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); 2475 return; 2476 } 2477 2478jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); 2479if (common->mode == JIT_PARTIAL_SOFT_COMPILE) 2480 { 2481 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); 2482 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); 2483 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP)); 2484 } 2485else 2486 { 2487 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); 2488 if (common->partialmatchlabel != NULL) 2489 JUMPTO(SLJIT_JUMP, common->partialmatchlabel); 2490 else 2491 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); 2492 } 2493JUMPHERE(jump); 2494} 2495 2496static void detect_partial_match(compiler_common *common, jump_list **backtracks) 2497{ 2498DEFINE_COMPILER; 2499struct sljit_jump *jump; 2500 2501if (common->mode == JIT_COMPILE) 2502 { 2503 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); 2504 return; 2505 } 2506 2507/* Partial matching mode. */ 2508jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); 2509add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); 2510if (common->mode == JIT_PARTIAL_SOFT_COMPILE) 2511 { 2512 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); 2513 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); 2514 } 2515else 2516 { 2517 if (common->partialmatchlabel != NULL) 2518 JUMPTO(SLJIT_JUMP, common->partialmatchlabel); 2519 else 2520 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); 2521 } 2522JUMPHERE(jump); 2523} 2524 2525static void peek_char(compiler_common *common, pcre_uint32 max) 2526{ 2527/* Reads the character into TMP1, keeps STR_PTR. 2528Does not check STR_END. TMP2 Destroyed. */ 2529DEFINE_COMPILER; 2530#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 2531struct sljit_jump *jump; 2532#endif 2533 2534SLJIT_UNUSED_ARG(max); 2535 2536OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); 2537#if defined SUPPORT_UTF && defined COMPILE_PCRE8 2538if (common->utf) 2539 { 2540 if (max < 128) return; 2541 2542 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); 2543 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2544 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); 2545 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); 2546 JUMPHERE(jump); 2547 } 2548#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ 2549 2550#if defined SUPPORT_UTF && defined COMPILE_PCRE16 2551if (common->utf) 2552 { 2553 if (max < 0xd800) return; 2554 2555 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); 2556 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); 2557 /* TMP2 contains the high surrogate. */ 2558 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 2559 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40); 2560 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10); 2561 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff); 2562 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2563 JUMPHERE(jump); 2564 } 2565#endif 2566} 2567 2568#if defined SUPPORT_UTF && defined COMPILE_PCRE8 2569 2570static BOOL is_char7_bitset(const pcre_uint8 *bitset, BOOL nclass) 2571{ 2572/* Tells whether the character codes below 128 are enough 2573to determine a match. */ 2574const pcre_uint8 value = nclass ? 0xff : 0; 2575const pcre_uint8* end = bitset + 32; 2576 2577bitset += 16; 2578do 2579 { 2580 if (*bitset++ != value) 2581 return FALSE; 2582 } 2583while (bitset < end); 2584return TRUE; 2585} 2586 2587static void read_char7_type(compiler_common *common, BOOL full_read) 2588{ 2589/* Reads the precise character type of a character into TMP1, if the character 2590is less than 128. Otherwise it returns with zero. Does not check STR_END. The 2591full_read argument tells whether characters above max are accepted or not. */ 2592DEFINE_COMPILER; 2593struct sljit_jump *jump; 2594 2595SLJIT_ASSERT(common->utf); 2596 2597OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); 2598OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2599 2600OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); 2601 2602if (full_read) 2603 { 2604 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0); 2605 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); 2606 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); 2607 JUMPHERE(jump); 2608 } 2609} 2610 2611#endif /* SUPPORT_UTF && COMPILE_PCRE8 */ 2612 2613static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr) 2614{ 2615/* Reads the precise value of a character into TMP1, if the character is 2616between min and max (c >= min && c <= max). Otherwise it returns with a value 2617outside the range. Does not check STR_END. */ 2618DEFINE_COMPILER; 2619#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 2620struct sljit_jump *jump; 2621#endif 2622#if defined SUPPORT_UTF && defined COMPILE_PCRE8 2623struct sljit_jump *jump2; 2624#endif 2625 2626SLJIT_UNUSED_ARG(update_str_ptr); 2627SLJIT_UNUSED_ARG(min); 2628SLJIT_UNUSED_ARG(max); 2629SLJIT_ASSERT(min <= max); 2630 2631OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 2632OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2633 2634#if defined SUPPORT_UTF && defined COMPILE_PCRE8 2635if (common->utf) 2636 { 2637 if (max < 128 && !update_str_ptr) return; 2638 2639 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); 2640 if (min >= 0x10000) 2641 { 2642 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0); 2643 if (update_str_ptr) 2644 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); 2645 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 2646 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x7); 2647 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); 2648 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); 2649 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2650 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); 2651 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); 2652 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); 2653 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2654 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); 2655 if (!update_str_ptr) 2656 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); 2657 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); 2658 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); 2659 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2660 JUMPHERE(jump2); 2661 if (update_str_ptr) 2662 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); 2663 } 2664 else if (min >= 0x800 && max <= 0xffff) 2665 { 2666 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0); 2667 if (update_str_ptr) 2668 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); 2669 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 2670 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xf); 2671 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); 2672 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); 2673 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2674 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); 2675 if (!update_str_ptr) 2676 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); 2677 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); 2678 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); 2679 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2680 JUMPHERE(jump2); 2681 if (update_str_ptr) 2682 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); 2683 } 2684 else if (max >= 0x800) 2685 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); 2686 else if (max < 128) 2687 { 2688 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); 2689 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); 2690 } 2691 else 2692 { 2693 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 2694 if (!update_str_ptr) 2695 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2696 else 2697 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); 2698 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); 2699 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); 2700 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); 2701 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2702 if (update_str_ptr) 2703 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); 2704 } 2705 JUMPHERE(jump); 2706 } 2707#endif 2708 2709#if defined SUPPORT_UTF && defined COMPILE_PCRE16 2710if (common->utf) 2711 { 2712 if (max >= 0x10000) 2713 { 2714 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); 2715 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); 2716 /* TMP2 contains the high surrogate. */ 2717 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 2718 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40); 2719 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10); 2720 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2721 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff); 2722 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2723 JUMPHERE(jump); 2724 return; 2725 } 2726 2727 if (max < 0xd800 && !update_str_ptr) return; 2728 2729 /* Skip low surrogate if necessary. */ 2730 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); 2731 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); 2732 if (update_str_ptr) 2733 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2734 if (max >= 0xd800) 2735 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000); 2736 JUMPHERE(jump); 2737 } 2738#endif 2739} 2740 2741static SLJIT_INLINE void read_char(compiler_common *common) 2742{ 2743read_char_range(common, 0, READ_CHAR_MAX, TRUE); 2744} 2745 2746static void read_char8_type(compiler_common *common, BOOL update_str_ptr) 2747{ 2748/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */ 2749DEFINE_COMPILER; 2750#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 2751struct sljit_jump *jump; 2752#endif 2753#if defined SUPPORT_UTF && defined COMPILE_PCRE8 2754struct sljit_jump *jump2; 2755#endif 2756 2757SLJIT_UNUSED_ARG(update_str_ptr); 2758 2759OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); 2760OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2761 2762#if defined SUPPORT_UTF && defined COMPILE_PCRE8 2763if (common->utf) 2764 { 2765 /* This can be an extra read in some situations, but hopefully 2766 it is needed in most cases. */ 2767 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); 2768 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0); 2769 if (!update_str_ptr) 2770 { 2771 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 2772 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2773 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); 2774 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); 2775 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); 2776 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); 2777 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); 2778 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); 2779 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); 2780 JUMPHERE(jump2); 2781 } 2782 else 2783 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL)); 2784 JUMPHERE(jump); 2785 return; 2786 } 2787#endif /* SUPPORT_UTF && COMPILE_PCRE8 */ 2788 2789#if !defined COMPILE_PCRE8 2790/* The ctypes array contains only 256 values. */ 2791OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); 2792jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); 2793#endif 2794OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); 2795#if !defined COMPILE_PCRE8 2796JUMPHERE(jump); 2797#endif 2798 2799#if defined SUPPORT_UTF && defined COMPILE_PCRE16 2800if (common->utf && update_str_ptr) 2801 { 2802 /* Skip low surrogate if necessary. */ 2803 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800); 2804 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); 2805 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2806 JUMPHERE(jump); 2807 } 2808#endif /* SUPPORT_UTF && COMPILE_PCRE16 */ 2809} 2810 2811static void skip_char_back(compiler_common *common) 2812{ 2813/* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */ 2814DEFINE_COMPILER; 2815#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 2816#if defined COMPILE_PCRE8 2817struct sljit_label *label; 2818 2819if (common->utf) 2820 { 2821 label = LABEL(); 2822 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); 2823 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2824 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); 2825 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label); 2826 return; 2827 } 2828#elif defined COMPILE_PCRE16 2829if (common->utf) 2830 { 2831 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); 2832 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2833 /* Skip low surrogate if necessary. */ 2834 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); 2835 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00); 2836 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); 2837 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); 2838 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 2839 return; 2840 } 2841#endif /* COMPILE_PCRE[8|16] */ 2842#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ 2843OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2844} 2845 2846static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch) 2847{ 2848/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */ 2849DEFINE_COMPILER; 2850struct sljit_jump *jump; 2851 2852if (nltype == NLTYPE_ANY) 2853 { 2854 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); 2855 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); 2856 } 2857else if (nltype == NLTYPE_ANYCRLF) 2858 { 2859 if (jumpifmatch) 2860 { 2861 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR)); 2862 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); 2863 } 2864 else 2865 { 2866 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); 2867 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); 2868 JUMPHERE(jump); 2869 } 2870 } 2871else 2872 { 2873 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256); 2874 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); 2875 } 2876} 2877 2878#ifdef SUPPORT_UTF 2879 2880#if defined COMPILE_PCRE8 2881static void do_utfreadchar(compiler_common *common) 2882{ 2883/* Fast decoding a UTF-8 character. TMP1 contains the first byte 2884of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */ 2885DEFINE_COMPILER; 2886struct sljit_jump *jump; 2887 2888sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); 2889OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 2890OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); 2891OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); 2892OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); 2893OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2894 2895/* Searching for the first zero. */ 2896OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); 2897jump = JUMP(SLJIT_C_NOT_ZERO); 2898/* Two byte sequence. */ 2899OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2900OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2)); 2901sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 2902 2903JUMPHERE(jump); 2904OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); 2905OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800); 2906OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); 2907OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); 2908OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2909 2910OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000); 2911jump = JUMP(SLJIT_C_NOT_ZERO); 2912/* Three byte sequence. */ 2913OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); 2914OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3)); 2915sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 2916 2917/* Four byte sequence. */ 2918JUMPHERE(jump); 2919OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); 2920OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000); 2921OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); 2922OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); 2923OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); 2924OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2925OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4)); 2926sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 2927} 2928 2929static void do_utfreadchar16(compiler_common *common) 2930{ 2931/* Fast decoding a UTF-8 character. TMP1 contains the first byte 2932of the character (>= 0xc0). Return value in TMP1. */ 2933DEFINE_COMPILER; 2934struct sljit_jump *jump; 2935 2936sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); 2937OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 2938OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); 2939OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); 2940OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); 2941OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2942 2943/* Searching for the first zero. */ 2944OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); 2945jump = JUMP(SLJIT_C_NOT_ZERO); 2946/* Two byte sequence. */ 2947OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2948sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 2949 2950JUMPHERE(jump); 2951OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400); 2952OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_NOT_ZERO); 2953/* This code runs only in 8 bit mode. No need to shift the value. */ 2954OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); 2955OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); 2956OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800); 2957OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); 2958OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); 2959OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); 2960/* Three byte sequence. */ 2961OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); 2962sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 2963} 2964 2965static void do_utfreadtype8(compiler_common *common) 2966{ 2967/* Fast decoding a UTF-8 character type. TMP2 contains the first byte 2968of the character (>= 0xc0). Return value in TMP1. */ 2969DEFINE_COMPILER; 2970struct sljit_jump *jump; 2971struct sljit_jump *compare; 2972 2973sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); 2974 2975OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20); 2976jump = JUMP(SLJIT_C_NOT_ZERO); 2977/* Two byte sequence. */ 2978OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 2979OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 2980OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f); 2981/* The upper 5 bits are known at this point. */ 2982compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3); 2983OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); 2984OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); 2985OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); 2986OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); 2987sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 2988 2989JUMPHERE(compare); 2990OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); 2991sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 2992 2993/* We only have types for characters less than 256. */ 2994JUMPHERE(jump); 2995OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); 2996OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); 2997OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); 2998sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 2999} 3000 3001#endif /* COMPILE_PCRE8 */ 3002 3003#endif /* SUPPORT_UTF */ 3004 3005#ifdef SUPPORT_UCP 3006 3007/* UCD_BLOCK_SIZE must be 128 (see the assert below). */ 3008#define UCD_BLOCK_MASK 127 3009#define UCD_BLOCK_SHIFT 7 3010 3011static void do_getucd(compiler_common *common) 3012{ 3013/* Search the UCD record for the character comes in TMP1. 3014Returns chartype in TMP1 and UCD offset in TMP2. */ 3015DEFINE_COMPILER; 3016 3017SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8); 3018 3019sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); 3020OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); 3021OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); 3022OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); 3023OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); 3024OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); 3025OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); 3026OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); 3027OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); 3028OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); 3029sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 3030} 3031#endif 3032 3033static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline) 3034{ 3035DEFINE_COMPILER; 3036struct sljit_label *mainloop; 3037struct sljit_label *newlinelabel = NULL; 3038struct sljit_jump *start; 3039struct sljit_jump *end = NULL; 3040struct sljit_jump *nl = NULL; 3041#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 3042struct sljit_jump *singlechar; 3043#endif 3044jump_list *newline = NULL; 3045BOOL newlinecheck = FALSE; 3046BOOL readuchar = FALSE; 3047 3048if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY || 3049 common->nltype == NLTYPE_ANYCRLF || common->newline > 255)) 3050 newlinecheck = TRUE; 3051 3052if (firstline) 3053 { 3054 /* Search for the end of the first line. */ 3055 SLJIT_ASSERT(common->first_line_end != 0); 3056 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); 3057 3058 if (common->nltype == NLTYPE_FIXED && common->newline > 255) 3059 { 3060 mainloop = LABEL(); 3061 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 3062 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 3063 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); 3064 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 3065 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop); 3066 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop); 3067 JUMPHERE(end); 3068 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 3069 } 3070 else 3071 { 3072 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 3073 mainloop = LABEL(); 3074 /* Continual stores does not cause data dependency. */ 3075 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0); 3076 read_char_range(common, common->nlmin, common->nlmax, TRUE); 3077 check_newlinechar(common, common->nltype, &newline, TRUE); 3078 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop); 3079 JUMPHERE(end); 3080 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0); 3081 set_jumps(newline, LABEL()); 3082 } 3083 3084 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); 3085 } 3086 3087start = JUMP(SLJIT_JUMP); 3088 3089if (newlinecheck) 3090 { 3091 newlinelabel = LABEL(); 3092 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 3093 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 3094 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); 3095 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff); 3096 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); 3097#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 3098 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); 3099#endif 3100 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 3101 nl = JUMP(SLJIT_JUMP); 3102 } 3103 3104mainloop = LABEL(); 3105 3106/* Increasing the STR_PTR here requires one less jump in the most common case. */ 3107#ifdef SUPPORT_UTF 3108if (common->utf) readuchar = TRUE; 3109#endif 3110if (newlinecheck) readuchar = TRUE; 3111 3112if (readuchar) 3113 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); 3114 3115if (newlinecheck) 3116 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel); 3117 3118OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 3119#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 3120#if defined COMPILE_PCRE8 3121if (common->utf) 3122 { 3123 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); 3124 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); 3125 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 3126 JUMPHERE(singlechar); 3127 } 3128#elif defined COMPILE_PCRE16 3129if (common->utf) 3130 { 3131 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); 3132 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); 3133 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); 3134 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); 3135 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); 3136 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 3137 JUMPHERE(singlechar); 3138 } 3139#endif /* COMPILE_PCRE[8|16] */ 3140#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ 3141JUMPHERE(start); 3142 3143if (newlinecheck) 3144 { 3145 JUMPHERE(end); 3146 JUMPHERE(nl); 3147 } 3148 3149return mainloop; 3150} 3151 3152#define MAX_N_CHARS 16 3153#define MAX_N_BYTES 8 3154 3155static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes) 3156{ 3157pcre_uint8 len = bytes[0]; 3158int i; 3159 3160if (len == 255) 3161 return; 3162 3163if (len == 0) 3164 { 3165 bytes[0] = 1; 3166 bytes[1] = byte; 3167 return; 3168 } 3169 3170for (i = len; i > 0; i--) 3171 if (bytes[i] == byte) 3172 return; 3173 3174if (len >= MAX_N_BYTES - 1) 3175 { 3176 bytes[0] = 255; 3177 return; 3178 } 3179 3180len++; 3181bytes[len] = byte; 3182bytes[0] = len; 3183} 3184 3185static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars) 3186{ 3187/* Recursive function, which scans prefix literals. */ 3188BOOL last, any, caseless; 3189int len, repeat, len_save, consumed = 0; 3190pcre_uint32 chr, mask; 3191pcre_uchar *alternative, *cc_save, *oc; 3192#if defined SUPPORT_UTF && defined COMPILE_PCRE8 3193pcre_uchar othercase[8]; 3194#elif defined SUPPORT_UTF && defined COMPILE_PCRE16 3195pcre_uchar othercase[2]; 3196#else 3197pcre_uchar othercase[1]; 3198#endif 3199 3200repeat = 1; 3201while (TRUE) 3202 { 3203 last = TRUE; 3204 any = FALSE; 3205 caseless = FALSE; 3206 switch (*cc) 3207 { 3208 case OP_CHARI: 3209 caseless = TRUE; 3210 case OP_CHAR: 3211 last = FALSE; 3212 cc++; 3213 break; 3214 3215 case OP_SOD: 3216 case OP_SOM: 3217 case OP_SET_SOM: 3218 case OP_NOT_WORD_BOUNDARY: 3219 case OP_WORD_BOUNDARY: 3220 case OP_EODN: 3221 case OP_EOD: 3222 case OP_CIRC: 3223 case OP_CIRCM: 3224 case OP_DOLL: 3225 case OP_DOLLM: 3226 /* Zero width assertions. */ 3227 cc++; 3228 continue; 3229 3230 case OP_ASSERT: 3231 case OP_ASSERT_NOT: 3232 case OP_ASSERTBACK: 3233 case OP_ASSERTBACK_NOT: 3234 cc = bracketend(cc); 3235 continue; 3236 3237 case OP_PLUSI: 3238 case OP_MINPLUSI: 3239 case OP_POSPLUSI: 3240 caseless = TRUE; 3241 case OP_PLUS: 3242 case OP_MINPLUS: 3243 case OP_POSPLUS: 3244 cc++; 3245 break; 3246 3247 case OP_EXACTI: 3248 caseless = TRUE; 3249 case OP_EXACT: 3250 repeat = GET2(cc, 1); 3251 last = FALSE; 3252 cc += 1 + IMM2_SIZE; 3253 break; 3254 3255 case OP_QUERYI: 3256 case OP_MINQUERYI: 3257 case OP_POSQUERYI: 3258 caseless = TRUE; 3259 case OP_QUERY: 3260 case OP_MINQUERY: 3261 case OP_POSQUERY: 3262 len = 1; 3263 cc++; 3264#ifdef SUPPORT_UTF 3265 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc); 3266#endif 3267 max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars); 3268 if (max_chars == 0) 3269 return consumed; 3270 last = FALSE; 3271 break; 3272 3273 case OP_KET: 3274 cc += 1 + LINK_SIZE; 3275 continue; 3276 3277 case OP_ALT: 3278 cc += GET(cc, 1); 3279 continue; 3280 3281 case OP_ONCE: 3282 case OP_ONCE_NC: 3283 case OP_BRA: 3284 case OP_BRAPOS: 3285 case OP_CBRA: 3286 case OP_CBRAPOS: 3287 alternative = cc + GET(cc, 1); 3288 while (*alternative == OP_ALT) 3289 { 3290 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars); 3291 if (max_chars == 0) 3292 return consumed; 3293 alternative += GET(alternative, 1); 3294 } 3295 3296 if (*cc == OP_CBRA || *cc == OP_CBRAPOS) 3297 cc += IMM2_SIZE; 3298 cc += 1 + LINK_SIZE; 3299 continue; 3300 3301 case OP_CLASS: 3302#if defined SUPPORT_UTF && defined COMPILE_PCRE8 3303 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed; 3304#endif 3305 any = TRUE; 3306 cc += 1 + 32 / sizeof(pcre_uchar); 3307 break; 3308 3309 case OP_NCLASS: 3310#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 3311 if (common->utf) return consumed; 3312#endif 3313 any = TRUE; 3314 cc += 1 + 32 / sizeof(pcre_uchar); 3315 break; 3316 3317#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 3318 case OP_XCLASS: 3319#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 3320 if (common->utf) return consumed; 3321#endif 3322 any = TRUE; 3323 cc += GET(cc, 1); 3324 break; 3325#endif 3326 3327 case OP_DIGIT: 3328#if defined SUPPORT_UTF && defined COMPILE_PCRE8 3329 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE)) 3330 return consumed; 3331#endif 3332 any = TRUE; 3333 cc++; 3334 break; 3335 3336 case OP_WHITESPACE: 3337#if defined SUPPORT_UTF && defined COMPILE_PCRE8 3338 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE)) 3339 return consumed; 3340#endif 3341 any = TRUE; 3342 cc++; 3343 break; 3344 3345 case OP_WORDCHAR: 3346#if defined SUPPORT_UTF && defined COMPILE_PCRE8 3347 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE)) 3348 return consumed; 3349#endif 3350 any = TRUE; 3351 cc++; 3352 break; 3353 3354 case OP_NOT: 3355 case OP_NOTI: 3356 cc++; 3357 /* Fall through. */ 3358 case OP_NOT_DIGIT: 3359 case OP_NOT_WHITESPACE: 3360 case OP_NOT_WORDCHAR: 3361 case OP_ANY: 3362 case OP_ALLANY: 3363#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 3364 if (common->utf) return consumed; 3365#endif 3366 any = TRUE; 3367 cc++; 3368 break; 3369 3370#ifdef SUPPORT_UCP 3371 case OP_NOTPROP: 3372 case OP_PROP: 3373#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 3374 if (common->utf) return consumed; 3375#endif 3376 any = TRUE; 3377 cc += 1 + 2; 3378 break; 3379#endif 3380 3381 case OP_TYPEEXACT: 3382 repeat = GET2(cc, 1); 3383 cc += 1 + IMM2_SIZE; 3384 continue; 3385 3386 case OP_NOTEXACT: 3387 case OP_NOTEXACTI: 3388#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 3389 if (common->utf) return consumed; 3390#endif 3391 any = TRUE; 3392 repeat = GET2(cc, 1); 3393 cc += 1 + IMM2_SIZE + 1; 3394 break; 3395 3396 default: 3397 return consumed; 3398 } 3399 3400 if (any) 3401 { 3402#if defined COMPILE_PCRE8 3403 mask = 0xff; 3404#elif defined COMPILE_PCRE16 3405 mask = 0xffff; 3406#elif defined COMPILE_PCRE32 3407 mask = 0xffffffff; 3408#else 3409 SLJIT_ASSERT_STOP(); 3410#endif 3411 3412 do 3413 { 3414 chars[0] = mask; 3415 chars[1] = mask; 3416 bytes[0] = 255; 3417 3418 consumed++; 3419 if (--max_chars == 0) 3420 return consumed; 3421 chars += 2; 3422 bytes += MAX_N_BYTES; 3423 } 3424 while (--repeat > 0); 3425 3426 repeat = 1; 3427 continue; 3428 } 3429 3430 len = 1; 3431#ifdef SUPPORT_UTF 3432 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc); 3433#endif 3434 3435 if (caseless && char_has_othercase(common, cc)) 3436 { 3437#ifdef SUPPORT_UTF 3438 if (common->utf) 3439 { 3440 GETCHAR(chr, cc); 3441 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len) 3442 return consumed; 3443 } 3444 else 3445#endif 3446 { 3447 chr = *cc; 3448 othercase[0] = TABLE_GET(chr, common->fcc, chr); 3449 } 3450 } 3451 else 3452 caseless = FALSE; 3453 3454 len_save = len; 3455 cc_save = cc; 3456 while (TRUE) 3457 { 3458 oc = othercase; 3459 do 3460 { 3461 chr = *cc; 3462#ifdef COMPILE_PCRE32 3463 if (SLJIT_UNLIKELY(chr == NOTACHAR)) 3464 return consumed; 3465#endif 3466 add_prefix_byte((pcre_uint8)chr, bytes); 3467 3468 mask = 0; 3469 if (caseless) 3470 { 3471 add_prefix_byte((pcre_uint8)*oc, bytes); 3472 mask = *cc ^ *oc; 3473 chr |= mask; 3474 } 3475 3476#ifdef COMPILE_PCRE32 3477 if (chars[0] == NOTACHAR && chars[1] == 0) 3478#else 3479 if (chars[0] == NOTACHAR) 3480#endif 3481 { 3482 chars[0] = chr; 3483 chars[1] = mask; 3484 } 3485 else 3486 { 3487 mask |= chars[0] ^ chr; 3488 chr |= mask; 3489 chars[0] = chr; 3490 chars[1] |= mask; 3491 } 3492 3493 len--; 3494 consumed++; 3495 if (--max_chars == 0) 3496 return consumed; 3497 chars += 2; 3498 bytes += MAX_N_BYTES; 3499 cc++; 3500 oc++; 3501 } 3502 while (len > 0); 3503 3504 if (--repeat == 0) 3505 break; 3506 3507 len = len_save; 3508 cc = cc_save; 3509 } 3510 3511 repeat = 1; 3512 if (last) 3513 return consumed; 3514 } 3515} 3516 3517static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline) 3518{ 3519DEFINE_COMPILER; 3520struct sljit_label *start; 3521struct sljit_jump *quit; 3522pcre_uint32 chars[MAX_N_CHARS * 2]; 3523pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES]; 3524pcre_uint8 ones[MAX_N_CHARS]; 3525int offsets[3]; 3526pcre_uint32 mask; 3527pcre_uint8 *byte_set, *byte_set_end; 3528int i, max, from; 3529int range_right = -1, range_len = 3 - 1; 3530sljit_ub *update_table = NULL; 3531BOOL in_range; 3532 3533/* This is even TRUE, if both are NULL. */ 3534SLJIT_ASSERT(common->read_only_data_ptr == common->read_only_data); 3535 3536for (i = 0; i < MAX_N_CHARS; i++) 3537 { 3538 chars[i << 1] = NOTACHAR; 3539 chars[(i << 1) + 1] = 0; 3540 bytes[i * MAX_N_BYTES] = 0; 3541 } 3542 3543max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS); 3544 3545if (max <= 1) 3546 return FALSE; 3547 3548for (i = 0; i < max; i++) 3549 { 3550 mask = chars[(i << 1) + 1]; 3551 ones[i] = ones_in_half_byte[mask & 0xf]; 3552 mask >>= 4; 3553 while (mask != 0) 3554 { 3555 ones[i] += ones_in_half_byte[mask & 0xf]; 3556 mask >>= 4; 3557 } 3558 } 3559 3560in_range = FALSE; 3561from = 0; /* Prevent compiler "uninitialized" warning */ 3562for (i = 0; i <= max; i++) 3563 { 3564 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4)) 3565 { 3566 range_len = i - from; 3567 range_right = i - 1; 3568 } 3569 3570 if (i < max && bytes[i * MAX_N_BYTES] < 255) 3571 { 3572 if (!in_range) 3573 { 3574 in_range = TRUE; 3575 from = i; 3576 } 3577 } 3578 else if (in_range) 3579 in_range = FALSE; 3580 } 3581 3582if (range_right >= 0) 3583 { 3584 /* Since no data is consumed (see the assert in the beginning 3585 of this function), this space can be reallocated. */ 3586 if (common->read_only_data) 3587 SLJIT_FREE(common->read_only_data); 3588 3589 common->read_only_data_size += 256; 3590 common->read_only_data = (sljit_uw *)SLJIT_MALLOC(common->read_only_data_size); 3591 if (common->read_only_data == NULL) 3592 return TRUE; 3593 3594 update_table = (sljit_ub *)common->read_only_data; 3595 common->read_only_data_ptr = (sljit_uw *)(update_table + 256); 3596 memset(update_table, IN_UCHARS(range_len), 256); 3597 3598 for (i = 0; i < range_len; i++) 3599 { 3600 byte_set = bytes + ((range_right - i) * MAX_N_BYTES); 3601 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255); 3602 byte_set_end = byte_set + byte_set[0]; 3603 byte_set++; 3604 while (byte_set <= byte_set_end) 3605 { 3606 if (update_table[*byte_set] > IN_UCHARS(i)) 3607 update_table[*byte_set] = IN_UCHARS(i); 3608 byte_set++; 3609 } 3610 } 3611 } 3612 3613offsets[0] = -1; 3614/* Scan forward. */ 3615for (i = 0; i < max; i++) 3616 if (ones[i] <= 2) { 3617 offsets[0] = i; 3618 break; 3619 } 3620 3621if (offsets[0] < 0 && range_right < 0) 3622 return FALSE; 3623 3624if (offsets[0] >= 0) 3625 { 3626 /* Scan backward. */ 3627 offsets[1] = -1; 3628 for (i = max - 1; i > offsets[0]; i--) 3629 if (ones[i] <= 2 && i != range_right) 3630 { 3631 offsets[1] = i; 3632 break; 3633 } 3634 3635 /* This case is handled better by fast_forward_first_char. */ 3636 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0) 3637 return FALSE; 3638 3639 offsets[2] = -1; 3640 /* We only search for a middle character if there is no range check. */ 3641 if (offsets[1] >= 0 && range_right == -1) 3642 { 3643 /* Scan from middle. */ 3644 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++) 3645 if (ones[i] <= 2) 3646 { 3647 offsets[2] = i; 3648 break; 3649 } 3650 3651 if (offsets[2] == -1) 3652 { 3653 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--) 3654 if (ones[i] <= 2) 3655 { 3656 offsets[2] = i; 3657 break; 3658 } 3659 } 3660 } 3661 3662 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1])); 3663 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2])); 3664 3665 chars[0] = chars[offsets[0] << 1]; 3666 chars[1] = chars[(offsets[0] << 1) + 1]; 3667 if (offsets[2] >= 0) 3668 { 3669 chars[2] = chars[offsets[2] << 1]; 3670 chars[3] = chars[(offsets[2] << 1) + 1]; 3671 } 3672 if (offsets[1] >= 0) 3673 { 3674 chars[4] = chars[offsets[1] << 1]; 3675 chars[5] = chars[(offsets[1] << 1) + 1]; 3676 } 3677 } 3678 3679max -= 1; 3680if (firstline) 3681 { 3682 SLJIT_ASSERT(common->first_line_end != 0); 3683 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end); 3684 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); 3685 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); 3686 quit = CMP(SLJIT_C_LESS_EQUAL, STR_END, 0, TMP1, 0); 3687 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0); 3688 JUMPHERE(quit); 3689 } 3690else 3691 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); 3692 3693#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 3694if (range_right >= 0) 3695 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table); 3696#endif 3697 3698start = LABEL(); 3699quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 3700 3701SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0); 3702 3703if (range_right >= 0) 3704 { 3705#if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) 3706 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right)); 3707#else 3708 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1); 3709#endif 3710 3711#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) 3712 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0); 3713#else 3714 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table); 3715#endif 3716 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 3717 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start); 3718 } 3719 3720if (offsets[0] >= 0) 3721 { 3722 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0])); 3723 if (offsets[1] >= 0) 3724 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1])); 3725 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 3726 3727 if (chars[1] != 0) 3728 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]); 3729 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start); 3730 if (offsets[2] >= 0) 3731 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1)); 3732 3733 if (offsets[1] >= 0) 3734 { 3735 if (chars[5] != 0) 3736 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]); 3737 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start); 3738 } 3739 3740 if (offsets[2] >= 0) 3741 { 3742 if (chars[3] != 0) 3743 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]); 3744 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start); 3745 } 3746 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 3747 } 3748 3749JUMPHERE(quit); 3750 3751if (firstline) 3752 { 3753 if (range_right >= 0) 3754 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end); 3755 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); 3756 if (range_right >= 0) 3757 { 3758 quit = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0); 3759 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); 3760 JUMPHERE(quit); 3761 } 3762 } 3763else 3764 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); 3765return TRUE; 3766} 3767 3768#undef MAX_N_CHARS 3769#undef MAX_N_BYTES 3770 3771static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline) 3772{ 3773DEFINE_COMPILER; 3774struct sljit_label *start; 3775struct sljit_jump *quit; 3776struct sljit_jump *found; 3777pcre_uchar oc, bit; 3778 3779if (firstline) 3780 { 3781 SLJIT_ASSERT(common->first_line_end != 0); 3782 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); 3783 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end); 3784 } 3785 3786start = LABEL(); 3787quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 3788OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); 3789 3790oc = first_char; 3791if (caseless) 3792 { 3793 oc = TABLE_GET(first_char, common->fcc, first_char); 3794#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) 3795 if (first_char > 127 && common->utf) 3796 oc = UCD_OTHERCASE(first_char); 3797#endif 3798 } 3799if (first_char == oc) 3800 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char); 3801else 3802 { 3803 bit = first_char ^ oc; 3804 if (is_powerof2(bit)) 3805 { 3806 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit); 3807 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit); 3808 } 3809 else 3810 { 3811 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char); 3812 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); 3813 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc); 3814 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 3815 found = JUMP(SLJIT_C_NOT_ZERO); 3816 } 3817 } 3818 3819OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 3820JUMPTO(SLJIT_JUMP, start); 3821JUMPHERE(found); 3822JUMPHERE(quit); 3823 3824if (firstline) 3825 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); 3826} 3827 3828static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline) 3829{ 3830DEFINE_COMPILER; 3831struct sljit_label *loop; 3832struct sljit_jump *lastchar; 3833struct sljit_jump *firstchar; 3834struct sljit_jump *quit; 3835struct sljit_jump *foundcr = NULL; 3836struct sljit_jump *notfoundnl; 3837jump_list *newline = NULL; 3838 3839if (firstline) 3840 { 3841 SLJIT_ASSERT(common->first_line_end != 0); 3842 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); 3843 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end); 3844 } 3845 3846if (common->nltype == NLTYPE_FIXED && common->newline > 255) 3847 { 3848 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 3849 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 3850 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); 3851 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); 3852 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0); 3853 3854 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2)); 3855 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0); 3856 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL); 3857#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 3858 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT); 3859#endif 3860 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); 3861 3862 loop = LABEL(); 3863 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 3864 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 3865 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); 3866 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); 3867 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop); 3868 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop); 3869 3870 JUMPHERE(quit); 3871 JUMPHERE(firstchar); 3872 JUMPHERE(lastchar); 3873 3874 if (firstline) 3875 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); 3876 return; 3877 } 3878 3879OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 3880OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); 3881firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0); 3882skip_char_back(common); 3883 3884loop = LABEL(); 3885common->ff_newline_shortcut = loop; 3886 3887read_char_range(common, common->nlmin, common->nlmax, TRUE); 3888lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 3889if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) 3890 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); 3891check_newlinechar(common, common->nltype, &newline, FALSE); 3892set_jumps(newline, loop); 3893 3894if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) 3895 { 3896 quit = JUMP(SLJIT_JUMP); 3897 JUMPHERE(foundcr); 3898 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 3899 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); 3900 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL); 3901 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); 3902#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 3903 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); 3904#endif 3905 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 3906 JUMPHERE(notfoundnl); 3907 JUMPHERE(quit); 3908 } 3909JUMPHERE(lastchar); 3910JUMPHERE(firstchar); 3911 3912if (firstline) 3913 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); 3914} 3915 3916static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks); 3917 3918static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline) 3919{ 3920DEFINE_COMPILER; 3921struct sljit_label *start; 3922struct sljit_jump *quit; 3923struct sljit_jump *found = NULL; 3924jump_list *matches = NULL; 3925#ifndef COMPILE_PCRE8 3926struct sljit_jump *jump; 3927#endif 3928 3929if (firstline) 3930 { 3931 SLJIT_ASSERT(common->first_line_end != 0); 3932 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0); 3933 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end); 3934 } 3935 3936start = LABEL(); 3937quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 3938OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); 3939#ifdef SUPPORT_UTF 3940if (common->utf) 3941 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); 3942#endif 3943 3944if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches)) 3945 { 3946#ifndef COMPILE_PCRE8 3947 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255); 3948 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255); 3949 JUMPHERE(jump); 3950#endif 3951 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); 3952 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); 3953 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits); 3954 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); 3955 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); 3956 found = JUMP(SLJIT_C_NOT_ZERO); 3957 } 3958 3959#ifdef SUPPORT_UTF 3960if (common->utf) 3961 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); 3962#endif 3963OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 3964#ifdef SUPPORT_UTF 3965#if defined COMPILE_PCRE8 3966if (common->utf) 3967 { 3968 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start); 3969 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); 3970 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 3971 } 3972#elif defined COMPILE_PCRE16 3973if (common->utf) 3974 { 3975 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start); 3976 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); 3977 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); 3978 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); 3979 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); 3980 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 3981 } 3982#endif /* COMPILE_PCRE[8|16] */ 3983#endif /* SUPPORT_UTF */ 3984JUMPTO(SLJIT_JUMP, start); 3985if (found != NULL) 3986 JUMPHERE(found); 3987if (matches != NULL) 3988 set_jumps(matches, LABEL()); 3989JUMPHERE(quit); 3990 3991if (firstline) 3992 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0); 3993} 3994 3995static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar) 3996{ 3997DEFINE_COMPILER; 3998struct sljit_label *loop; 3999struct sljit_jump *toolong; 4000struct sljit_jump *alreadyfound; 4001struct sljit_jump *found; 4002struct sljit_jump *foundoc = NULL; 4003struct sljit_jump *notfound; 4004pcre_uint32 oc, bit; 4005 4006SLJIT_ASSERT(common->req_char_ptr != 0); 4007OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr); 4008OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX); 4009toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0); 4010alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0); 4011 4012if (has_firstchar) 4013 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 4014else 4015 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0); 4016 4017loop = LABEL(); 4018notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0); 4019 4020OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0); 4021oc = req_char; 4022if (caseless) 4023 { 4024 oc = TABLE_GET(req_char, common->fcc, req_char); 4025#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) 4026 if (req_char > 127 && common->utf) 4027 oc = UCD_OTHERCASE(req_char); 4028#endif 4029 } 4030if (req_char == oc) 4031 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char); 4032else 4033 { 4034 bit = req_char ^ oc; 4035 if (is_powerof2(bit)) 4036 { 4037 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit); 4038 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit); 4039 } 4040 else 4041 { 4042 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char); 4043 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc); 4044 } 4045 } 4046OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); 4047JUMPTO(SLJIT_JUMP, loop); 4048 4049JUMPHERE(found); 4050if (foundoc) 4051 JUMPHERE(foundoc); 4052OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0); 4053JUMPHERE(alreadyfound); 4054JUMPHERE(toolong); 4055return notfound; 4056} 4057 4058static void do_revertframes(compiler_common *common) 4059{ 4060DEFINE_COMPILER; 4061struct sljit_jump *jump; 4062struct sljit_label *mainloop; 4063 4064sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); 4065OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0); 4066GET_LOCAL_BASE(TMP3, 0, 0); 4067 4068/* Drop frames until we reach STACK_TOP. */ 4069mainloop = LABEL(); 4070OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0); 4071OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0); 4072jump = JUMP(SLJIT_C_SIG_LESS_EQUAL); 4073 4074OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0); 4075OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw)); 4076OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw)); 4077OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw)); 4078JUMPTO(SLJIT_JUMP, mainloop); 4079 4080JUMPHERE(jump); 4081jump = JUMP(SLJIT_C_SIG_LESS); 4082/* End of dropping frames. */ 4083sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 4084 4085JUMPHERE(jump); 4086OP1(SLJIT_NEG, TMP2, 0, TMP2, 0); 4087OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0); 4088OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw)); 4089OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); 4090JUMPTO(SLJIT_JUMP, mainloop); 4091} 4092 4093static void check_wordboundary(compiler_common *common) 4094{ 4095DEFINE_COMPILER; 4096struct sljit_jump *skipread; 4097jump_list *skipread_list = NULL; 4098#if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF 4099struct sljit_jump *jump; 4100#endif 4101 4102SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16); 4103 4104sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); 4105/* Get type of the previous char, and put it to LOCALS1. */ 4106OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 4107OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); 4108OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0); 4109skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0); 4110skip_char_back(common); 4111check_start_used_ptr(common); 4112read_char(common); 4113 4114/* Testing char type. */ 4115#ifdef SUPPORT_UCP 4116if (common->use_ucp) 4117 { 4118 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); 4119 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); 4120 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); 4121 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); 4122 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); 4123 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); 4124 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); 4125 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); 4126 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); 4127 JUMPHERE(jump); 4128 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0); 4129 } 4130else 4131#endif 4132 { 4133#ifndef COMPILE_PCRE8 4134 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); 4135#elif defined SUPPORT_UTF 4136 /* Here LOCALS1 has already been zeroed. */ 4137 jump = NULL; 4138 if (common->utf) 4139 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); 4140#endif /* COMPILE_PCRE8 */ 4141 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes); 4142 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */); 4143 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); 4144 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0); 4145#ifndef COMPILE_PCRE8 4146 JUMPHERE(jump); 4147#elif defined SUPPORT_UTF 4148 if (jump != NULL) 4149 JUMPHERE(jump); 4150#endif /* COMPILE_PCRE8 */ 4151 } 4152JUMPHERE(skipread); 4153 4154OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); 4155check_str_end(common, &skipread_list); 4156peek_char(common, READ_CHAR_MAX); 4157 4158/* Testing char type. This is a code duplication. */ 4159#ifdef SUPPORT_UCP 4160if (common->use_ucp) 4161 { 4162 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); 4163 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); 4164 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); 4165 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); 4166 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); 4167 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); 4168 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); 4169 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); 4170 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); 4171 JUMPHERE(jump); 4172 } 4173else 4174#endif 4175 { 4176#ifndef COMPILE_PCRE8 4177 /* TMP2 may be destroyed by peek_char. */ 4178 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); 4179 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); 4180#elif defined SUPPORT_UTF 4181 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); 4182 jump = NULL; 4183 if (common->utf) 4184 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); 4185#endif 4186 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes); 4187 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */); 4188 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); 4189#ifndef COMPILE_PCRE8 4190 JUMPHERE(jump); 4191#elif defined SUPPORT_UTF 4192 if (jump != NULL) 4193 JUMPHERE(jump); 4194#endif /* COMPILE_PCRE8 */ 4195 } 4196set_jumps(skipread_list, LABEL()); 4197 4198OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); 4199sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); 4200} 4201 4202static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks) 4203{ 4204DEFINE_COMPILER; 4205int ranges[MAX_RANGE_SIZE]; 4206pcre_uint8 bit, cbit, all; 4207int i, byte, length = 0; 4208 4209bit = bits[0] & 0x1; 4210/* All bits will be zero or one (since bit is zero or one). */ 4211all = -bit; 4212 4213for (i = 0; i < 256; ) 4214 { 4215 byte = i >> 3; 4216 if ((i & 0x7) == 0 && bits[byte] == all) 4217 i += 8; 4218 else 4219 { 4220 cbit = (bits[byte] >> (i & 0x7)) & 0x1; 4221 if (cbit != bit) 4222 { 4223 if (length >= MAX_RANGE_SIZE) 4224 return FALSE; 4225 ranges[length] = i; 4226 length++; 4227 bit = cbit; 4228 all = -cbit; 4229 } 4230 i++; 4231 } 4232 } 4233 4234if (((bit == 0) && nclass) || ((bit == 1) && !nclass)) 4235 { 4236 if (length >= MAX_RANGE_SIZE) 4237 return FALSE; 4238 ranges[length] = 256; 4239 length++; 4240 } 4241 4242if (length < 0 || length > 4) 4243 return FALSE; 4244 4245bit = bits[0] & 0x1; 4246if (invert) bit ^= 0x1; 4247 4248/* No character is accepted. */ 4249if (length == 0 && bit == 0) 4250 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); 4251 4252switch(length) 4253 { 4254 case 0: 4255 /* When bit != 0, all characters are accepted. */ 4256 return TRUE; 4257 4258 case 1: 4259 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); 4260 return TRUE; 4261 4262 case 2: 4263 if (ranges[0] + 1 != ranges[1]) 4264 { 4265 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); 4266 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); 4267 } 4268 else 4269 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); 4270 return TRUE; 4271 4272 case 3: 4273 if (bit != 0) 4274 { 4275 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); 4276 if (ranges[0] + 1 != ranges[1]) 4277 { 4278 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); 4279 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); 4280 } 4281 else 4282 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); 4283 return TRUE; 4284 } 4285 4286 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0])); 4287 if (ranges[1] + 1 != ranges[2]) 4288 { 4289 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]); 4290 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1])); 4291 } 4292 else 4293 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1])); 4294 return TRUE; 4295 4296 case 4: 4297 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2]) 4298 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2] 4299 && is_powerof2(ranges[2] - ranges[0])) 4300 { 4301 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]); 4302 if (ranges[2] + 1 != ranges[3]) 4303 { 4304 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]); 4305 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); 4306 } 4307 else 4308 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); 4309 return TRUE; 4310 } 4311 4312 if (bit != 0) 4313 { 4314 i = 0; 4315 if (ranges[0] + 1 != ranges[1]) 4316 { 4317 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); 4318 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); 4319 i = ranges[0]; 4320 } 4321 else 4322 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); 4323 4324 if (ranges[2] + 1 != ranges[3]) 4325 { 4326 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i); 4327 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); 4328 } 4329 else 4330 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i)); 4331 return TRUE; 4332 } 4333 4334 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); 4335 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0])); 4336 if (ranges[1] + 1 != ranges[2]) 4337 { 4338 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]); 4339 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1])); 4340 } 4341 else 4342 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); 4343 return TRUE; 4344 4345 default: 4346 SLJIT_ASSERT_STOP(); 4347 return FALSE; 4348 } 4349} 4350 4351static void check_anynewline(compiler_common *common) 4352{ 4353/* Check whether TMP1 contains a newline character. TMP2 destroyed. */ 4354DEFINE_COMPILER; 4355 4356sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); 4357 4358OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); 4359OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); 4360OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); 4361OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); 4362#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 4363#ifdef COMPILE_PCRE8 4364if (common->utf) 4365 { 4366#endif 4367 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 4368 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); 4369 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); 4370#ifdef COMPILE_PCRE8 4371 } 4372#endif 4373#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */ 4374OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 4375sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 4376} 4377 4378static void check_hspace(compiler_common *common) 4379{ 4380/* Check whether TMP1 contains a newline character. TMP2 destroyed. */ 4381DEFINE_COMPILER; 4382 4383sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); 4384 4385OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09); 4386OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); 4387OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); 4388OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 4389OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0); 4390#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 4391#ifdef COMPILE_PCRE8 4392if (common->utf) 4393 { 4394#endif 4395 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 4396 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680); 4397 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 4398 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e); 4399 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 4400 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000); 4401 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000); 4402 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); 4403 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000); 4404 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 4405 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000); 4406 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 4407 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000); 4408#ifdef COMPILE_PCRE8 4409 } 4410#endif 4411#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */ 4412OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 4413 4414sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 4415} 4416 4417static void check_vspace(compiler_common *common) 4418{ 4419/* Check whether TMP1 contains a newline character. TMP2 destroyed. */ 4420DEFINE_COMPILER; 4421 4422sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); 4423 4424OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); 4425OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); 4426OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); 4427OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); 4428#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 4429#ifdef COMPILE_PCRE8 4430if (common->utf) 4431 { 4432#endif 4433 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 4434 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); 4435 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); 4436#ifdef COMPILE_PCRE8 4437 } 4438#endif 4439#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */ 4440OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 4441 4442sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 4443} 4444 4445#define CHAR1 STR_END 4446#define CHAR2 STACK_TOP 4447 4448static void do_casefulcmp(compiler_common *common) 4449{ 4450DEFINE_COMPILER; 4451struct sljit_jump *jump; 4452struct sljit_label *label; 4453 4454sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); 4455OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); 4456OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0); 4457OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0); 4458OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); 4459OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 4460 4461label = LABEL(); 4462OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1)); 4463OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); 4464jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0); 4465OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); 4466JUMPTO(SLJIT_C_NOT_ZERO, label); 4467 4468JUMPHERE(jump); 4469OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 4470OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0); 4471OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); 4472sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 4473} 4474 4475#define LCC_TABLE STACK_LIMIT 4476 4477static void do_caselesscmp(compiler_common *common) 4478{ 4479DEFINE_COMPILER; 4480struct sljit_jump *jump; 4481struct sljit_label *label; 4482 4483sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); 4484OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); 4485 4486OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0); 4487OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0); 4488OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0); 4489OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc); 4490OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); 4491OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 4492 4493label = LABEL(); 4494OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1)); 4495OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); 4496#ifndef COMPILE_PCRE8 4497jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255); 4498#endif 4499OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0); 4500#ifndef COMPILE_PCRE8 4501JUMPHERE(jump); 4502jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255); 4503#endif 4504OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0); 4505#ifndef COMPILE_PCRE8 4506JUMPHERE(jump); 4507#endif 4508jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0); 4509OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); 4510JUMPTO(SLJIT_C_NOT_ZERO, label); 4511 4512JUMPHERE(jump); 4513OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 4514OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0); 4515OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); 4516OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); 4517sljit_emit_fast_return(compiler, RETURN_ADDR, 0); 4518} 4519 4520#undef LCC_TABLE 4521#undef CHAR1 4522#undef CHAR2 4523 4524#if defined SUPPORT_UTF && defined SUPPORT_UCP 4525 4526static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1) 4527{ 4528/* This function would be ineffective to do in JIT level. */ 4529pcre_uint32 c1, c2; 4530const pcre_uchar *src2 = args->uchar_ptr; 4531const pcre_uchar *end2 = args->end; 4532const ucd_record *ur; 4533const pcre_uint32 *pp; 4534 4535while (src1 < end1) 4536 { 4537 if (src2 >= end2) 4538 return (pcre_uchar*)1; 4539 GETCHARINC(c1, src1); 4540 GETCHARINC(c2, src2); 4541 ur = GET_UCD(c2); 4542 if (c1 != c2 && c1 != c2 + ur->other_case) 4543 { 4544 pp = PRIV(ucd_caseless_sets) + ur->caseset; 4545 for (;;) 4546 { 4547 if (c1 < *pp) return NULL; 4548 if (c1 == *pp++) break; 4549 } 4550 } 4551 } 4552return src2; 4553} 4554 4555#endif /* SUPPORT_UTF && SUPPORT_UCP */ 4556 4557static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc, 4558 compare_context* context, jump_list **backtracks) 4559{ 4560DEFINE_COMPILER; 4561unsigned int othercasebit = 0; 4562pcre_uchar *othercasechar = NULL; 4563#ifdef SUPPORT_UTF 4564int utflength; 4565#endif 4566 4567if (caseless && char_has_othercase(common, cc)) 4568 { 4569 othercasebit = char_get_othercase_bit(common, cc); 4570 SLJIT_ASSERT(othercasebit); 4571 /* Extracting bit difference info. */ 4572#if defined COMPILE_PCRE8 4573 othercasechar = cc + (othercasebit >> 8); 4574 othercasebit &= 0xff; 4575#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 4576 /* Note that this code only handles characters in the BMP. If there 4577 ever are characters outside the BMP whose othercase differs in only one 4578 bit from itself (there currently are none), this code will need to be 4579 revised for COMPILE_PCRE32. */ 4580 othercasechar = cc + (othercasebit >> 9); 4581 if ((othercasebit & 0x100) != 0) 4582 othercasebit = (othercasebit & 0xff) << 8; 4583 else 4584 othercasebit &= 0xff; 4585#endif /* COMPILE_PCRE[8|16|32] */ 4586 } 4587 4588if (context->sourcereg == -1) 4589 { 4590#if defined COMPILE_PCRE8 4591#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED 4592 if (context->length >= 4) 4593 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); 4594 else if (context->length >= 2) 4595 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); 4596 else 4597#endif 4598 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); 4599#elif defined COMPILE_PCRE16 4600#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED 4601 if (context->length >= 4) 4602 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); 4603 else 4604#endif 4605 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); 4606#elif defined COMPILE_PCRE32 4607 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); 4608#endif /* COMPILE_PCRE[8|16|32] */ 4609 context->sourcereg = TMP2; 4610 } 4611 4612#ifdef SUPPORT_UTF 4613utflength = 1; 4614if (common->utf && HAS_EXTRALEN(*cc)) 4615 utflength += GET_EXTRALEN(*cc); 4616 4617do 4618 { 4619#endif 4620 4621 context->length -= IN_UCHARS(1); 4622#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16) 4623 4624 /* Unaligned read is supported. */ 4625 if (othercasebit != 0 && othercasechar == cc) 4626 { 4627 context->c.asuchars[context->ucharptr] = *cc | othercasebit; 4628 context->oc.asuchars[context->ucharptr] = othercasebit; 4629 } 4630 else 4631 { 4632 context->c.asuchars[context->ucharptr] = *cc; 4633 context->oc.asuchars[context->ucharptr] = 0; 4634 } 4635 context->ucharptr++; 4636 4637#if defined COMPILE_PCRE8 4638 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1)) 4639#else 4640 if (context->ucharptr >= 2 || context->length == 0) 4641#endif 4642 { 4643 if (context->length >= 4) 4644 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); 4645 else if (context->length >= 2) 4646 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); 4647#if defined COMPILE_PCRE8 4648 else if (context->length >= 1) 4649 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); 4650#endif /* COMPILE_PCRE8 */ 4651 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; 4652 4653 switch(context->ucharptr) 4654 { 4655 case 4 / sizeof(pcre_uchar): 4656 if (context->oc.asint != 0) 4657 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint); 4658 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint)); 4659 break; 4660 4661 case 2 / sizeof(pcre_uchar): 4662 if (context->oc.asushort != 0) 4663 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort); 4664 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort)); 4665 break; 4666 4667#ifdef COMPILE_PCRE8 4668 case 1: 4669 if (context->oc.asbyte != 0) 4670 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte); 4671 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte)); 4672 break; 4673#endif 4674 4675 default: 4676 SLJIT_ASSERT_STOP(); 4677 break; 4678 } 4679 context->ucharptr = 0; 4680 } 4681 4682#else 4683 4684 /* Unaligned read is unsupported or in 32 bit mode. */ 4685 if (context->length >= 1) 4686 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); 4687 4688 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; 4689 4690 if (othercasebit != 0 && othercasechar == cc) 4691 { 4692 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit); 4693 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit)); 4694 } 4695 else 4696 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc)); 4697 4698#endif 4699 4700 cc++; 4701#ifdef SUPPORT_UTF 4702 utflength--; 4703 } 4704while (utflength > 0); 4705#endif 4706 4707return cc; 4708} 4709 4710#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 4711 4712#define SET_TYPE_OFFSET(value) \ 4713 if ((value) != typeoffset) \ 4714 { \ 4715 if ((value) < typeoffset) \ 4716 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \ 4717 else \ 4718 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \ 4719 } \ 4720 typeoffset = (value); 4721 4722#define SET_CHAR_OFFSET(value) \ 4723 if ((value) != charoffset) \ 4724 { \ 4725 if ((value) < charoffset) \ 4726 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \ 4727 else \ 4728 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \ 4729 } \ 4730 charoffset = (value); 4731 4732static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks) 4733{ 4734DEFINE_COMPILER; 4735jump_list *found = NULL; 4736jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks; 4737sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX; 4738struct sljit_jump *jump = NULL; 4739pcre_uchar *ccbegin; 4740int compares, invertcmp, numberofcmps; 4741#if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16) 4742BOOL utf = common->utf; 4743#endif 4744 4745#ifdef SUPPORT_UCP 4746BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE; 4747BOOL charsaved = FALSE; 4748int typereg = TMP1, scriptreg = TMP1; 4749const pcre_uint32 *other_cases; 4750sljit_uw typeoffset; 4751#endif 4752 4753/* Scanning the necessary info. */ 4754cc++; 4755ccbegin = cc; 4756compares = 0; 4757if (cc[-1] & XCL_MAP) 4758 { 4759 min = 0; 4760 cc += 32 / sizeof(pcre_uchar); 4761 } 4762 4763while (*cc != XCL_END) 4764 { 4765 compares++; 4766 if (*cc == XCL_SINGLE) 4767 { 4768 cc ++; 4769 GETCHARINCTEST(c, cc); 4770 if (c > max) max = c; 4771 if (c < min) min = c; 4772#ifdef SUPPORT_UCP 4773 needschar = TRUE; 4774#endif 4775 } 4776 else if (*cc == XCL_RANGE) 4777 { 4778 cc ++; 4779 GETCHARINCTEST(c, cc); 4780 if (c < min) min = c; 4781 GETCHARINCTEST(c, cc); 4782 if (c > max) max = c; 4783#ifdef SUPPORT_UCP 4784 needschar = TRUE; 4785#endif 4786 } 4787#ifdef SUPPORT_UCP 4788 else 4789 { 4790 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); 4791 cc++; 4792 if (*cc == PT_CLIST) 4793 { 4794 other_cases = PRIV(ucd_caseless_sets) + cc[1]; 4795 while (*other_cases != NOTACHAR) 4796 { 4797 if (*other_cases > max) max = *other_cases; 4798 if (*other_cases < min) min = *other_cases; 4799 other_cases++; 4800 } 4801 } 4802 else 4803 { 4804 max = READ_CHAR_MAX; 4805 min = 0; 4806 } 4807 4808 switch(*cc) 4809 { 4810 case PT_ANY: 4811 break; 4812 4813 case PT_LAMP: 4814 case PT_GC: 4815 case PT_PC: 4816 case PT_ALNUM: 4817 needstype = TRUE; 4818 break; 4819 4820 case PT_SC: 4821 needsscript = TRUE; 4822 break; 4823 4824 case PT_SPACE: 4825 case PT_PXSPACE: 4826 case PT_WORD: 4827 case PT_PXGRAPH: 4828 case PT_PXPRINT: 4829 case PT_PXPUNCT: 4830 needstype = TRUE; 4831 needschar = TRUE; 4832 break; 4833 4834 case PT_CLIST: 4835 case PT_UCNC: 4836 needschar = TRUE; 4837 break; 4838 4839 default: 4840 SLJIT_ASSERT_STOP(); 4841 break; 4842 } 4843 cc += 2; 4844 } 4845#endif 4846 } 4847 4848/* We are not necessary in utf mode even in 8 bit mode. */ 4849cc = ccbegin; 4850detect_partial_match(common, backtracks); 4851read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0); 4852 4853if ((cc[-1] & XCL_HASPROP) == 0) 4854 { 4855 if ((cc[-1] & XCL_MAP) != 0) 4856 { 4857 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); 4858 if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found)) 4859 { 4860 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); 4861 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); 4862 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); 4863 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); 4864 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); 4865 add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO)); 4866 } 4867 4868 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); 4869 JUMPHERE(jump); 4870 4871 cc += 32 / sizeof(pcre_uchar); 4872 } 4873 else 4874 { 4875 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min); 4876 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, max - min)); 4877 } 4878 } 4879else if ((cc[-1] & XCL_MAP) != 0) 4880 { 4881 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); 4882#ifdef SUPPORT_UCP 4883 charsaved = TRUE; 4884#endif 4885 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list)) 4886 { 4887#ifdef COMPILE_PCRE8 4888 SLJIT_ASSERT(common->utf); 4889#endif 4890 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); 4891 4892 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); 4893 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); 4894 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); 4895 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); 4896 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); 4897 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO)); 4898 4899 JUMPHERE(jump); 4900 } 4901 4902 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); 4903 cc += 32 / sizeof(pcre_uchar); 4904 } 4905 4906#ifdef SUPPORT_UCP 4907/* Simple register allocation. TMP1 is preferred if possible. */ 4908if (needstype || needsscript) 4909 { 4910 if (needschar && !charsaved) 4911 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); 4912 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); 4913 if (needschar) 4914 { 4915 if (needstype) 4916 { 4917 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); 4918 typereg = RETURN_ADDR; 4919 } 4920 4921 if (needsscript) 4922 scriptreg = TMP3; 4923 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); 4924 } 4925 else if (needstype && needsscript) 4926 scriptreg = TMP3; 4927 /* In all other cases only one of them was specified, and that can goes to TMP1. */ 4928 4929 if (needsscript) 4930 { 4931 if (scriptreg == TMP1) 4932 { 4933 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); 4934 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3); 4935 } 4936 else 4937 { 4938 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); 4939 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); 4940 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0); 4941 } 4942 } 4943 } 4944#endif 4945 4946/* Generating code. */ 4947charoffset = 0; 4948numberofcmps = 0; 4949#ifdef SUPPORT_UCP 4950typeoffset = 0; 4951#endif 4952 4953while (*cc != XCL_END) 4954 { 4955 compares--; 4956 invertcmp = (compares == 0 && list != backtracks); 4957 jump = NULL; 4958 4959 if (*cc == XCL_SINGLE) 4960 { 4961 cc ++; 4962 GETCHARINCTEST(c, cc); 4963 4964 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) 4965 { 4966 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); 4967 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL); 4968 numberofcmps++; 4969 } 4970 else if (numberofcmps > 0) 4971 { 4972 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); 4973 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 4974 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); 4975 numberofcmps = 0; 4976 } 4977 else 4978 { 4979 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); 4980 numberofcmps = 0; 4981 } 4982 } 4983 else if (*cc == XCL_RANGE) 4984 { 4985 cc ++; 4986 GETCHARINCTEST(c, cc); 4987 SET_CHAR_OFFSET(c); 4988 GETCHARINCTEST(c, cc); 4989 4990 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) 4991 { 4992 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); 4993 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL); 4994 numberofcmps++; 4995 } 4996 else if (numberofcmps > 0) 4997 { 4998 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); 4999 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); 5000 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); 5001 numberofcmps = 0; 5002 } 5003 else 5004 { 5005 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); 5006 numberofcmps = 0; 5007 } 5008 } 5009#ifdef SUPPORT_UCP 5010 else 5011 { 5012 if (*cc == XCL_NOTPROP) 5013 invertcmp ^= 0x1; 5014 cc++; 5015 switch(*cc) 5016 { 5017 case PT_ANY: 5018 if (list != backtracks) 5019 { 5020 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0)) 5021 continue; 5022 } 5023 else if (cc[-1] == XCL_NOTPROP) 5024 continue; 5025 jump = JUMP(SLJIT_JUMP); 5026 break; 5027 5028 case PT_LAMP: 5029 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset); 5030 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); 5031 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset); 5032 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 5033 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset); 5034 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 5035 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); 5036 break; 5037 5038 case PT_GC: 5039 c = PRIV(ucp_typerange)[(int)cc[1] * 2]; 5040 SET_TYPE_OFFSET(c); 5041 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c); 5042 break; 5043 5044 case PT_PC: 5045 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset); 5046 break; 5047 5048 case PT_SC: 5049 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]); 5050 break; 5051 5052 case PT_SPACE: 5053 case PT_PXSPACE: 5054 SET_CHAR_OFFSET(9); 5055 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9); 5056 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); 5057 5058 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9); 5059 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 5060 5061 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9); 5062 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 5063 5064 SET_TYPE_OFFSET(ucp_Zl); 5065 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl); 5066 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); 5067 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); 5068 break; 5069 5070 case PT_WORD: 5071 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset)); 5072 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); 5073 /* Fall through. */ 5074 5075 case PT_ALNUM: 5076 SET_TYPE_OFFSET(ucp_Ll); 5077 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); 5078 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL); 5079 SET_TYPE_OFFSET(ucp_Nd); 5080 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd); 5081 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); 5082 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); 5083 break; 5084 5085 case PT_CLIST: 5086 other_cases = PRIV(ucd_caseless_sets) + cc[1]; 5087 5088 /* At least three characters are required. 5089 Otherwise this case would be handled by the normal code path. */ 5090 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR); 5091 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]); 5092 5093 /* Optimizing character pairs, if their difference is power of 2. */ 5094 if (is_powerof2(other_cases[1] ^ other_cases[0])) 5095 { 5096 if (charoffset == 0) 5097 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); 5098 else 5099 { 5100 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); 5101 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); 5102 } 5103 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]); 5104 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); 5105 other_cases += 2; 5106 } 5107 else if (is_powerof2(other_cases[2] ^ other_cases[1])) 5108 { 5109 if (charoffset == 0) 5110 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]); 5111 else 5112 { 5113 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); 5114 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); 5115 } 5116 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]); 5117 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); 5118 5119 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset)); 5120 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 5121 5122 other_cases += 3; 5123 } 5124 else 5125 { 5126 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); 5127 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); 5128 } 5129 5130 while (*other_cases != NOTACHAR) 5131 { 5132 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); 5133 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 5134 } 5135 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); 5136 break; 5137 5138 case PT_UCNC: 5139 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset)); 5140 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); 5141 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset)); 5142 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 5143 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset)); 5144 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 5145 5146 SET_CHAR_OFFSET(0xa0); 5147 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset)); 5148 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); 5149 SET_CHAR_OFFSET(0); 5150 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0); 5151 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL); 5152 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); 5153 break; 5154 5155 case PT_PXGRAPH: 5156 /* C and Z groups are the farthest two groups. */ 5157 SET_TYPE_OFFSET(ucp_Ll); 5158 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); 5159 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER); 5160 5161 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); 5162 5163 /* In case of ucp_Cf, we overwrite the result. */ 5164 SET_CHAR_OFFSET(0x2066); 5165 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); 5166 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); 5167 5168 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); 5169 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 5170 5171 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); 5172 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 5173 5174 JUMPHERE(jump); 5175 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); 5176 break; 5177 5178 case PT_PXPRINT: 5179 /* C and Z groups are the farthest two groups. */ 5180 SET_TYPE_OFFSET(ucp_Ll); 5181 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); 5182 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER); 5183 5184 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll); 5185 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL); 5186 5187 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); 5188 5189 /* In case of ucp_Cf, we overwrite the result. */ 5190 SET_CHAR_OFFSET(0x2066); 5191 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); 5192 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); 5193 5194 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); 5195 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL); 5196 5197 JUMPHERE(jump); 5198 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); 5199 break; 5200 5201 case PT_PXPUNCT: 5202 SET_TYPE_OFFSET(ucp_Sc); 5203 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc); 5204 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL); 5205 5206 SET_CHAR_OFFSET(0); 5207 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff); 5208 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); 5209 5210 SET_TYPE_OFFSET(ucp_Pc); 5211 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc); 5212 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL); 5213 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp); 5214 break; 5215 } 5216 cc += 2; 5217 } 5218#endif 5219 5220 if (jump != NULL) 5221 add_jump(compiler, compares > 0 ? list : backtracks, jump); 5222 } 5223 5224if (found != NULL) 5225 set_jumps(found, LABEL()); 5226} 5227 5228#undef SET_TYPE_OFFSET 5229#undef SET_CHAR_OFFSET 5230 5231#endif 5232 5233static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks) 5234{ 5235DEFINE_COMPILER; 5236int length; 5237unsigned int c, oc, bit; 5238compare_context context; 5239struct sljit_jump *jump[4]; 5240jump_list *end_list; 5241#ifdef SUPPORT_UTF 5242struct sljit_label *label; 5243#ifdef SUPPORT_UCP 5244pcre_uchar propdata[5]; 5245#endif 5246#endif /* SUPPORT_UTF */ 5247 5248switch(type) 5249 { 5250 case OP_SOD: 5251 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 5252 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); 5253 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); 5254 return cc; 5255 5256 case OP_SOM: 5257 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 5258 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); 5259 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); 5260 return cc; 5261 5262 case OP_NOT_WORD_BOUNDARY: 5263 case OP_WORD_BOUNDARY: 5264 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL)); 5265 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); 5266 return cc; 5267 5268 case OP_NOT_DIGIT: 5269 case OP_DIGIT: 5270 /* Digits are usually 0-9, so it is worth to optimize them. */ 5271 detect_partial_match(common, backtracks); 5272#if defined SUPPORT_UTF && defined COMPILE_PCRE8 5273 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE)) 5274 read_char7_type(common, type == OP_NOT_DIGIT); 5275 else 5276#endif 5277 read_char8_type(common, type == OP_NOT_DIGIT); 5278 /* Flip the starting bit in the negative case. */ 5279 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit); 5280 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); 5281 return cc; 5282 5283 case OP_NOT_WHITESPACE: 5284 case OP_WHITESPACE: 5285 detect_partial_match(common, backtracks); 5286#if defined SUPPORT_UTF && defined COMPILE_PCRE8 5287 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE)) 5288 read_char7_type(common, type == OP_NOT_WHITESPACE); 5289 else 5290#endif 5291 read_char8_type(common, type == OP_NOT_WHITESPACE); 5292 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space); 5293 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); 5294 return cc; 5295 5296 case OP_NOT_WORDCHAR: 5297 case OP_WORDCHAR: 5298 detect_partial_match(common, backtracks); 5299#if defined SUPPORT_UTF && defined COMPILE_PCRE8 5300 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE)) 5301 read_char7_type(common, type == OP_NOT_WORDCHAR); 5302 else 5303#endif 5304 read_char8_type(common, type == OP_NOT_WORDCHAR); 5305 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word); 5306 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO)); 5307 return cc; 5308 5309 case OP_ANY: 5310 detect_partial_match(common, backtracks); 5311 read_char_range(common, common->nlmin, common->nlmax, TRUE); 5312 if (common->nltype == NLTYPE_FIXED && common->newline > 255) 5313 { 5314 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); 5315 end_list = NULL; 5316 if (common->mode != JIT_PARTIAL_HARD_COMPILE) 5317 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); 5318 else 5319 check_str_end(common, &end_list); 5320 5321 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); 5322 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff)); 5323 set_jumps(end_list, LABEL()); 5324 JUMPHERE(jump[0]); 5325 } 5326 else 5327 check_newlinechar(common, common->nltype, backtracks, TRUE); 5328 return cc; 5329 5330 case OP_ALLANY: 5331 detect_partial_match(common, backtracks); 5332#ifdef SUPPORT_UTF 5333 if (common->utf) 5334 { 5335 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); 5336 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 5337#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16 5338#if defined COMPILE_PCRE8 5339 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); 5340 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); 5341 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 5342#elif defined COMPILE_PCRE16 5343 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); 5344 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); 5345 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); 5346 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL); 5347 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); 5348 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 5349#endif 5350 JUMPHERE(jump[0]); 5351#endif /* COMPILE_PCRE[8|16] */ 5352 return cc; 5353 } 5354#endif 5355 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 5356 return cc; 5357 5358 case OP_ANYBYTE: 5359 detect_partial_match(common, backtracks); 5360 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 5361 return cc; 5362 5363#ifdef SUPPORT_UTF 5364#ifdef SUPPORT_UCP 5365 case OP_NOTPROP: 5366 case OP_PROP: 5367 propdata[0] = XCL_HASPROP; 5368 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP; 5369 propdata[2] = cc[0]; 5370 propdata[3] = cc[1]; 5371 propdata[4] = XCL_END; 5372 compile_xclass_matchingpath(common, propdata, backtracks); 5373 return cc + 2; 5374#endif 5375#endif 5376 5377 case OP_ANYNL: 5378 detect_partial_match(common, backtracks); 5379 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE); 5380 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); 5381 /* We don't need to handle soft partial matching case. */ 5382 end_list = NULL; 5383 if (common->mode != JIT_PARTIAL_HARD_COMPILE) 5384 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); 5385 else 5386 check_str_end(common, &end_list); 5387 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); 5388 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); 5389 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 5390 jump[2] = JUMP(SLJIT_JUMP); 5391 JUMPHERE(jump[0]); 5392 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE); 5393 set_jumps(end_list, LABEL()); 5394 JUMPHERE(jump[1]); 5395 JUMPHERE(jump[2]); 5396 return cc; 5397 5398 case OP_NOT_HSPACE: 5399 case OP_HSPACE: 5400 detect_partial_match(common, backtracks); 5401 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE); 5402 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL)); 5403 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); 5404 return cc; 5405 5406 case OP_NOT_VSPACE: 5407 case OP_VSPACE: 5408 detect_partial_match(common, backtracks); 5409 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE); 5410 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL)); 5411 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO)); 5412 return cc; 5413 5414#ifdef SUPPORT_UCP 5415 case OP_EXTUNI: 5416 detect_partial_match(common, backtracks); 5417 read_char(common); 5418 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); 5419 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop)); 5420 /* Optimize register allocation: use a real register. */ 5421 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); 5422 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3); 5423 5424 label = LABEL(); 5425 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 5426 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); 5427 read_char(common); 5428 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); 5429 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop)); 5430 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3); 5431 5432 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2); 5433 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable)); 5434 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0); 5435 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); 5436 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); 5437 JUMPTO(SLJIT_C_NOT_ZERO, label); 5438 5439 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); 5440 JUMPHERE(jump[0]); 5441 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); 5442 5443 if (common->mode == JIT_PARTIAL_HARD_COMPILE) 5444 { 5445 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); 5446 /* Since we successfully read a char above, partial matching must occure. */ 5447 check_partial(common, TRUE); 5448 JUMPHERE(jump[0]); 5449 } 5450 return cc; 5451#endif 5452 5453 case OP_EODN: 5454 /* Requires rather complex checks. */ 5455 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); 5456 if (common->nltype == NLTYPE_FIXED && common->newline > 255) 5457 { 5458 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); 5459 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 5460 if (common->mode == JIT_COMPILE) 5461 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0)); 5462 else 5463 { 5464 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0); 5465 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); 5466 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS); 5467 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); 5468 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL); 5469 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL)); 5470 check_partial(common, TRUE); 5471 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); 5472 JUMPHERE(jump[1]); 5473 } 5474 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); 5475 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); 5476 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); 5477 } 5478 else if (common->nltype == NLTYPE_FIXED) 5479 { 5480 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 5481 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 5482 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0)); 5483 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); 5484 } 5485 else 5486 { 5487 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 5488 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); 5489 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); 5490 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); 5491 jump[2] = JUMP(SLJIT_C_GREATER); 5492 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS)); 5493 /* Equal. */ 5494 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); 5495 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); 5496 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); 5497 5498 JUMPHERE(jump[1]); 5499 if (common->nltype == NLTYPE_ANYCRLF) 5500 { 5501 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 5502 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0)); 5503 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); 5504 } 5505 else 5506 { 5507 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0); 5508 read_char_range(common, common->nlmin, common->nlmax, TRUE); 5509 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); 5510 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); 5511 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO)); 5512 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); 5513 } 5514 JUMPHERE(jump[2]); 5515 JUMPHERE(jump[3]); 5516 } 5517 JUMPHERE(jump[0]); 5518 check_partial(common, FALSE); 5519 return cc; 5520 5521 case OP_EOD: 5522 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0)); 5523 check_partial(common, FALSE); 5524 return cc; 5525 5526 case OP_CIRC: 5527 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); 5528 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); 5529 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0)); 5530 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); 5531 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); 5532 return cc; 5533 5534 case OP_CIRCM: 5535 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); 5536 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); 5537 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0); 5538 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); 5539 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); 5540 jump[0] = JUMP(SLJIT_JUMP); 5541 JUMPHERE(jump[1]); 5542 5543 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); 5544 if (common->nltype == NLTYPE_FIXED && common->newline > 255) 5545 { 5546 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); 5547 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0)); 5548 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); 5549 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); 5550 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); 5551 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); 5552 } 5553 else 5554 { 5555 skip_char_back(common); 5556 read_char_range(common, common->nlmin, common->nlmax, TRUE); 5557 check_newlinechar(common, common->nltype, backtracks, FALSE); 5558 } 5559 JUMPHERE(jump[0]); 5560 return cc; 5561 5562 case OP_DOLL: 5563 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); 5564 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); 5565 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); 5566 5567 if (!common->endonly) 5568 compile_char1_matchingpath(common, OP_EODN, cc, backtracks); 5569 else 5570 { 5571 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0)); 5572 check_partial(common, FALSE); 5573 } 5574 return cc; 5575 5576 case OP_DOLLM: 5577 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0); 5578 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); 5579 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); 5580 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); 5581 check_partial(common, FALSE); 5582 jump[0] = JUMP(SLJIT_JUMP); 5583 JUMPHERE(jump[1]); 5584 5585 if (common->nltype == NLTYPE_FIXED && common->newline > 255) 5586 { 5587 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); 5588 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); 5589 if (common->mode == JIT_COMPILE) 5590 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0)); 5591 else 5592 { 5593 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0); 5594 /* STR_PTR = STR_END - IN_UCHARS(1) */ 5595 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); 5596 check_partial(common, TRUE); 5597 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); 5598 JUMPHERE(jump[1]); 5599 } 5600 5601 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); 5602 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); 5603 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); 5604 } 5605 else 5606 { 5607 peek_char(common, common->nlmax); 5608 check_newlinechar(common, common->nltype, backtracks, FALSE); 5609 } 5610 JUMPHERE(jump[0]); 5611 return cc; 5612 5613 case OP_CHAR: 5614 case OP_CHARI: 5615 length = 1; 5616#ifdef SUPPORT_UTF 5617 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc); 5618#endif 5619 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)) 5620 { 5621 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); 5622 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0)); 5623 5624 context.length = IN_UCHARS(length); 5625 context.sourcereg = -1; 5626#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED 5627 context.ucharptr = 0; 5628#endif 5629 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks); 5630 } 5631 5632 detect_partial_match(common, backtracks); 5633#ifdef SUPPORT_UTF 5634 if (common->utf) 5635 { 5636 GETCHAR(c, cc); 5637 } 5638 else 5639#endif 5640 c = *cc; 5641 5642 if (type == OP_CHAR || !char_has_othercase(common, cc)) 5643 { 5644 read_char_range(common, c, c, FALSE); 5645 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c)); 5646 return cc + length; 5647 } 5648 oc = char_othercase(common, c); 5649 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE); 5650 bit = c ^ oc; 5651 if (is_powerof2(bit)) 5652 { 5653 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit); 5654 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); 5655 return cc + length; 5656 } 5657 jump[0] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c); 5658 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc)); 5659 JUMPHERE(jump[0]); 5660 return cc + length; 5661 5662 case OP_NOT: 5663 case OP_NOTI: 5664 detect_partial_match(common, backtracks); 5665 length = 1; 5666#ifdef SUPPORT_UTF 5667 if (common->utf) 5668 { 5669#ifdef COMPILE_PCRE8 5670 c = *cc; 5671 if (c < 128) 5672 { 5673 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); 5674 if (type == OP_NOT || !char_has_othercase(common, cc)) 5675 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c)); 5676 else 5677 { 5678 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */ 5679 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20); 5680 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20)); 5681 } 5682 /* Skip the variable-length character. */ 5683 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); 5684 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); 5685 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); 5686 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); 5687 JUMPHERE(jump[0]); 5688 return cc + 1; 5689 } 5690 else 5691#endif /* COMPILE_PCRE8 */ 5692 { 5693 GETCHARLEN(c, cc, length); 5694 } 5695 } 5696 else 5697#endif /* SUPPORT_UTF */ 5698 c = *cc; 5699 5700 if (type == OP_NOT || !char_has_othercase(common, cc)) 5701 { 5702 read_char_range(common, c, c, TRUE); 5703 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c)); 5704 } 5705 else 5706 { 5707 oc = char_othercase(common, c); 5708 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE); 5709 bit = c ^ oc; 5710 if (is_powerof2(bit)) 5711 { 5712 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit); 5713 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); 5714 } 5715 else 5716 { 5717 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c)); 5718 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc)); 5719 } 5720 } 5721 return cc + length; 5722 5723 case OP_CLASS: 5724 case OP_NCLASS: 5725 detect_partial_match(common, backtracks); 5726 5727#if defined SUPPORT_UTF && defined COMPILE_PCRE8 5728 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255; 5729 read_char_range(common, 0, bit, type == OP_NCLASS); 5730#else 5731 read_char_range(common, 0, 255, type == OP_NCLASS); 5732#endif 5733 5734 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks)) 5735 return cc + 32 / sizeof(pcre_uchar); 5736 5737#if defined SUPPORT_UTF && defined COMPILE_PCRE8 5738 jump[0] = NULL; 5739 if (common->utf) 5740 { 5741 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit); 5742 if (type == OP_CLASS) 5743 { 5744 add_jump(compiler, backtracks, jump[0]); 5745 jump[0] = NULL; 5746 } 5747 } 5748#elif !defined COMPILE_PCRE8 5749 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); 5750 if (type == OP_CLASS) 5751 { 5752 add_jump(compiler, backtracks, jump[0]); 5753 jump[0] = NULL; 5754 } 5755#endif /* SUPPORT_UTF && COMPILE_PCRE8 */ 5756 5757 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); 5758 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); 5759 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); 5760 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); 5761 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); 5762 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO)); 5763 5764#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 5765 if (jump[0] != NULL) 5766 JUMPHERE(jump[0]); 5767#endif 5768 5769 return cc + 32 / sizeof(pcre_uchar); 5770 5771#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 5772 case OP_XCLASS: 5773 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks); 5774 return cc + GET(cc, 0) - 1; 5775#endif 5776 5777 case OP_REVERSE: 5778 length = GET(cc, 0); 5779 if (length == 0) 5780 return cc + LINK_SIZE; 5781 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 5782#ifdef SUPPORT_UTF 5783 if (common->utf) 5784 { 5785 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); 5786 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length); 5787 label = LABEL(); 5788 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0)); 5789 skip_char_back(common); 5790 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); 5791 JUMPTO(SLJIT_C_NOT_ZERO, label); 5792 } 5793 else 5794#endif 5795 { 5796 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); 5797 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); 5798 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0)); 5799 } 5800 check_start_used_ptr(common); 5801 return cc + LINK_SIZE; 5802 } 5803SLJIT_ASSERT_STOP(); 5804return cc; 5805} 5806 5807static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks) 5808{ 5809/* This function consumes at least one input character. */ 5810/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */ 5811DEFINE_COMPILER; 5812pcre_uchar *ccbegin = cc; 5813compare_context context; 5814int size; 5815 5816context.length = 0; 5817do 5818 { 5819 if (cc >= ccend) 5820 break; 5821 5822 if (*cc == OP_CHAR) 5823 { 5824 size = 1; 5825#ifdef SUPPORT_UTF 5826 if (common->utf && HAS_EXTRALEN(cc[1])) 5827 size += GET_EXTRALEN(cc[1]); 5828#endif 5829 } 5830 else if (*cc == OP_CHARI) 5831 { 5832 size = 1; 5833#ifdef SUPPORT_UTF 5834 if (common->utf) 5835 { 5836 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0) 5837 size = 0; 5838 else if (HAS_EXTRALEN(cc[1])) 5839 size += GET_EXTRALEN(cc[1]); 5840 } 5841 else 5842#endif 5843 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0) 5844 size = 0; 5845 } 5846 else 5847 size = 0; 5848 5849 cc += 1 + size; 5850 context.length += IN_UCHARS(size); 5851 } 5852while (size > 0 && context.length <= 128); 5853 5854cc = ccbegin; 5855if (context.length > 0) 5856 { 5857 /* We have a fixed-length byte sequence. */ 5858 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length); 5859 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0)); 5860 5861 context.sourcereg = -1; 5862#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED 5863 context.ucharptr = 0; 5864#endif 5865 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0); 5866 return cc; 5867 } 5868 5869/* A non-fixed length character will be checked if length == 0. */ 5870return compile_char1_matchingpath(common, *cc, cc + 1, backtracks); 5871} 5872 5873/* Forward definitions. */ 5874static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *); 5875static void compile_backtrackingpath(compiler_common *, struct backtrack_common *); 5876 5877#define PUSH_BACKTRACK(size, ccstart, error) \ 5878 do \ 5879 { \ 5880 backtrack = sljit_alloc_memory(compiler, (size)); \ 5881 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ 5882 return error; \ 5883 memset(backtrack, 0, size); \ 5884 backtrack->prev = parent->top; \ 5885 backtrack->cc = (ccstart); \ 5886 parent->top = backtrack; \ 5887 } \ 5888 while (0) 5889 5890#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \ 5891 do \ 5892 { \ 5893 backtrack = sljit_alloc_memory(compiler, (size)); \ 5894 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ 5895 return; \ 5896 memset(backtrack, 0, size); \ 5897 backtrack->prev = parent->top; \ 5898 backtrack->cc = (ccstart); \ 5899 parent->top = backtrack; \ 5900 } \ 5901 while (0) 5902 5903#define BACKTRACK_AS(type) ((type *)backtrack) 5904 5905static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks) 5906{ 5907/* The OVECTOR offset goes to TMP2. */ 5908DEFINE_COMPILER; 5909int count = GET2(cc, 1 + IMM2_SIZE); 5910pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size; 5911unsigned int offset; 5912jump_list *found = NULL; 5913 5914SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI); 5915 5916OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); 5917 5918count--; 5919while (count-- > 0) 5920 { 5921 offset = GET2(slot, 0) << 1; 5922 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset)); 5923 add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0)); 5924 slot += common->name_entry_size; 5925 } 5926 5927offset = GET2(slot, 0) << 1; 5928GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset)); 5929if (backtracks != NULL && !common->jscript_compat) 5930 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0)); 5931 5932set_jumps(found, LABEL()); 5933} 5934 5935static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail) 5936{ 5937DEFINE_COMPILER; 5938BOOL ref = (*cc == OP_REF || *cc == OP_REFI); 5939int offset = 0; 5940struct sljit_jump *jump = NULL; 5941struct sljit_jump *partial; 5942struct sljit_jump *nopartial; 5943 5944if (ref) 5945 { 5946 offset = GET2(cc, 1) << 1; 5947 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); 5948 /* OVECTOR(1) contains the "string begin - 1" constant. */ 5949 if (withchecks && !common->jscript_compat) 5950 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); 5951 } 5952else 5953 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); 5954 5955#if defined SUPPORT_UTF && defined SUPPORT_UCP 5956if (common->utf && *cc == OP_REFI) 5957 { 5958 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2); 5959 if (ref) 5960 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); 5961 else 5962 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); 5963 5964 if (withchecks) 5965 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0); 5966 5967 /* Needed to save important temporary registers. */ 5968 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); 5969 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0); 5970 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0); 5971 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp)); 5972 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); 5973 if (common->mode == JIT_COMPILE) 5974 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1)); 5975 else 5976 { 5977 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); 5978 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1); 5979 check_partial(common, FALSE); 5980 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); 5981 JUMPHERE(nopartial); 5982 } 5983 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); 5984 } 5985else 5986#endif /* SUPPORT_UTF && SUPPORT_UCP */ 5987 { 5988 if (ref) 5989 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0); 5990 else 5991 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0); 5992 5993 if (withchecks) 5994 jump = JUMP(SLJIT_C_ZERO); 5995 5996 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); 5997 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0); 5998 if (common->mode == JIT_COMPILE) 5999 add_jump(compiler, backtracks, partial); 6000 6001 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); 6002 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); 6003 6004 if (common->mode != JIT_COMPILE) 6005 { 6006 nopartial = JUMP(SLJIT_JUMP); 6007 JUMPHERE(partial); 6008 /* TMP2 -= STR_END - STR_PTR */ 6009 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0); 6010 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0); 6011 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0); 6012 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); 6013 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); 6014 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); 6015 JUMPHERE(partial); 6016 check_partial(common, FALSE); 6017 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); 6018 JUMPHERE(nopartial); 6019 } 6020 } 6021 6022if (jump != NULL) 6023 { 6024 if (emptyfail) 6025 add_jump(compiler, backtracks, jump); 6026 else 6027 JUMPHERE(jump); 6028 } 6029} 6030 6031static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) 6032{ 6033DEFINE_COMPILER; 6034BOOL ref = (*cc == OP_REF || *cc == OP_REFI); 6035backtrack_common *backtrack; 6036pcre_uchar type; 6037int offset = 0; 6038struct sljit_label *label; 6039struct sljit_jump *zerolength; 6040struct sljit_jump *jump = NULL; 6041pcre_uchar *ccbegin = cc; 6042int min = 0, max = 0; 6043BOOL minimize; 6044 6045PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL); 6046 6047if (ref) 6048 offset = GET2(cc, 1) << 1; 6049else 6050 cc += IMM2_SIZE; 6051type = cc[1 + IMM2_SIZE]; 6052 6053SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even); 6054minimize = (type & 0x1) != 0; 6055switch(type) 6056 { 6057 case OP_CRSTAR: 6058 case OP_CRMINSTAR: 6059 min = 0; 6060 max = 0; 6061 cc += 1 + IMM2_SIZE + 1; 6062 break; 6063 case OP_CRPLUS: 6064 case OP_CRMINPLUS: 6065 min = 1; 6066 max = 0; 6067 cc += 1 + IMM2_SIZE + 1; 6068 break; 6069 case OP_CRQUERY: 6070 case OP_CRMINQUERY: 6071 min = 0; 6072 max = 1; 6073 cc += 1 + IMM2_SIZE + 1; 6074 break; 6075 case OP_CRRANGE: 6076 case OP_CRMINRANGE: 6077 min = GET2(cc, 1 + IMM2_SIZE + 1); 6078 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE); 6079 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE; 6080 break; 6081 default: 6082 SLJIT_ASSERT_STOP(); 6083 break; 6084 } 6085 6086if (!minimize) 6087 { 6088 if (min == 0) 6089 { 6090 allocate_stack(common, 2); 6091 if (ref) 6092 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); 6093 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 6094 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); 6095 /* Temporary release of STR_PTR. */ 6096 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); 6097 /* Handles both invalid and empty cases. Since the minimum repeat, 6098 is zero the invalid case is basically the same as an empty case. */ 6099 if (ref) 6100 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); 6101 else 6102 { 6103 compile_dnref_search(common, ccbegin, NULL); 6104 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); 6105 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0); 6106 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); 6107 } 6108 /* Restore if not zero length. */ 6109 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); 6110 } 6111 else 6112 { 6113 allocate_stack(common, 1); 6114 if (ref) 6115 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); 6116 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 6117 if (ref) 6118 { 6119 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); 6120 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); 6121 } 6122 else 6123 { 6124 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks); 6125 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); 6126 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0); 6127 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); 6128 } 6129 } 6130 6131 if (min > 1 || max > 1) 6132 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0); 6133 6134 label = LABEL(); 6135 if (!ref) 6136 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1); 6137 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE); 6138 6139 if (min > 1 || max > 1) 6140 { 6141 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0); 6142 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); 6143 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0); 6144 if (min > 1) 6145 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label); 6146 if (max > 1) 6147 { 6148 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max); 6149 allocate_stack(common, 1); 6150 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 6151 JUMPTO(SLJIT_JUMP, label); 6152 JUMPHERE(jump); 6153 } 6154 } 6155 6156 if (max == 0) 6157 { 6158 /* Includes min > 1 case as well. */ 6159 allocate_stack(common, 1); 6160 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 6161 JUMPTO(SLJIT_JUMP, label); 6162 } 6163 6164 JUMPHERE(zerolength); 6165 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); 6166 6167 count_match(common); 6168 return cc; 6169 } 6170 6171allocate_stack(common, ref ? 2 : 3); 6172if (ref) 6173 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); 6174OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 6175if (type != OP_CRMINSTAR) 6176 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); 6177 6178if (min == 0) 6179 { 6180 /* Handles both invalid and empty cases. Since the minimum repeat, 6181 is zero the invalid case is basically the same as an empty case. */ 6182 if (ref) 6183 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); 6184 else 6185 { 6186 compile_dnref_search(common, ccbegin, NULL); 6187 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); 6188 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0); 6189 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); 6190 } 6191 /* Length is non-zero, we can match real repeats. */ 6192 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 6193 jump = JUMP(SLJIT_JUMP); 6194 } 6195else 6196 { 6197 if (ref) 6198 { 6199 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); 6200 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); 6201 } 6202 else 6203 { 6204 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks); 6205 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); 6206 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0); 6207 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); 6208 } 6209 } 6210 6211BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); 6212if (max > 0) 6213 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max)); 6214 6215if (!ref) 6216 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); 6217compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE); 6218OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 6219 6220if (min > 1) 6221 { 6222 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); 6223 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); 6224 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); 6225 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath); 6226 } 6227else if (max > 0) 6228 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1); 6229 6230if (jump != NULL) 6231 JUMPHERE(jump); 6232JUMPHERE(zerolength); 6233 6234count_match(common); 6235return cc; 6236} 6237 6238static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) 6239{ 6240DEFINE_COMPILER; 6241backtrack_common *backtrack; 6242recurse_entry *entry = common->entries; 6243recurse_entry *prev = NULL; 6244sljit_sw start = GET(cc, 1); 6245pcre_uchar *start_cc; 6246BOOL needs_control_head; 6247 6248PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL); 6249 6250/* Inlining simple patterns. */ 6251if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack) 6252 { 6253 start_cc = common->start + start; 6254 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack); 6255 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE; 6256 return cc + 1 + LINK_SIZE; 6257 } 6258 6259while (entry != NULL) 6260 { 6261 if (entry->start == start) 6262 break; 6263 prev = entry; 6264 entry = entry->next; 6265 } 6266 6267if (entry == NULL) 6268 { 6269 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry)); 6270 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 6271 return NULL; 6272 entry->next = NULL; 6273 entry->entry = NULL; 6274 entry->calls = NULL; 6275 entry->start = start; 6276 6277 if (prev != NULL) 6278 prev->next = entry; 6279 else 6280 common->entries = entry; 6281 } 6282 6283if (common->has_set_som && common->mark_ptr != 0) 6284 { 6285 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); 6286 allocate_stack(common, 2); 6287 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); 6288 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); 6289 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); 6290 } 6291else if (common->has_set_som || common->mark_ptr != 0) 6292 { 6293 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr); 6294 allocate_stack(common, 1); 6295 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); 6296 } 6297 6298if (entry->entry == NULL) 6299 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL)); 6300else 6301 JUMPTO(SLJIT_FAST_CALL, entry->entry); 6302/* Leave if the match is failed. */ 6303add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0)); 6304return cc + 1 + LINK_SIZE; 6305} 6306 6307static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector) 6308{ 6309const pcre_uchar *begin = arguments->begin; 6310int *offset_vector = arguments->offsets; 6311int offset_count = arguments->offset_count; 6312int i; 6313 6314if (PUBL(callout) == NULL) 6315 return 0; 6316 6317callout_block->version = 2; 6318callout_block->callout_data = arguments->callout_data; 6319 6320/* Offsets in subject. */ 6321callout_block->subject_length = arguments->end - arguments->begin; 6322callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin; 6323callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin; 6324#if defined COMPILE_PCRE8 6325callout_block->subject = (PCRE_SPTR)begin; 6326#elif defined COMPILE_PCRE16 6327callout_block->subject = (PCRE_SPTR16)begin; 6328#elif defined COMPILE_PCRE32 6329callout_block->subject = (PCRE_SPTR32)begin; 6330#endif 6331 6332/* Convert and copy the JIT offset vector to the offset_vector array. */ 6333callout_block->capture_top = 0; 6334callout_block->offset_vector = offset_vector; 6335for (i = 2; i < offset_count; i += 2) 6336 { 6337 offset_vector[i] = jit_ovector[i] - begin; 6338 offset_vector[i + 1] = jit_ovector[i + 1] - begin; 6339 if (jit_ovector[i] >= begin) 6340 callout_block->capture_top = i; 6341 } 6342 6343callout_block->capture_top = (callout_block->capture_top >> 1) + 1; 6344if (offset_count > 0) 6345 offset_vector[0] = -1; 6346if (offset_count > 1) 6347 offset_vector[1] = -1; 6348return (*PUBL(callout))(callout_block); 6349} 6350 6351/* Aligning to 8 byte. */ 6352#define CALLOUT_ARG_SIZE \ 6353 (((int)sizeof(PUBL(callout_block)) + 7) & ~7) 6354 6355#define CALLOUT_ARG_OFFSET(arg) \ 6356 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg)) 6357 6358static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) 6359{ 6360DEFINE_COMPILER; 6361backtrack_common *backtrack; 6362 6363PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); 6364 6365allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw)); 6366 6367OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); 6368OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 6369SLJIT_ASSERT(common->capture_last_ptr != 0); 6370OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]); 6371OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0); 6372 6373/* These pointer sized fields temporarly stores internal variables. */ 6374OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); 6375OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0); 6376OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0); 6377 6378if (common->mark_ptr != 0) 6379 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr)); 6380OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2)); 6381OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE)); 6382OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0); 6383 6384/* Needed to save important temporary registers. */ 6385OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); 6386OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE); 6387GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START); 6388sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout)); 6389OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0); 6390OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); 6391free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw)); 6392 6393/* Check return value. */ 6394OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); 6395add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER)); 6396if (common->forced_quit_label == NULL) 6397 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS)); 6398else 6399 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label); 6400return cc + 2 + 2 * LINK_SIZE; 6401} 6402 6403#undef CALLOUT_ARG_SIZE 6404#undef CALLOUT_ARG_OFFSET 6405 6406static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional) 6407{ 6408DEFINE_COMPILER; 6409int framesize; 6410int extrasize; 6411BOOL needs_control_head; 6412int private_data_ptr; 6413backtrack_common altbacktrack; 6414pcre_uchar *ccbegin; 6415pcre_uchar opcode; 6416pcre_uchar bra = OP_BRA; 6417jump_list *tmp = NULL; 6418jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks; 6419jump_list **found; 6420/* Saving previous accept variables. */ 6421BOOL save_local_exit = common->local_exit; 6422BOOL save_positive_assert = common->positive_assert; 6423then_trap_backtrack *save_then_trap = common->then_trap; 6424struct sljit_label *save_quit_label = common->quit_label; 6425struct sljit_label *save_accept_label = common->accept_label; 6426jump_list *save_quit = common->quit; 6427jump_list *save_positive_assert_quit = common->positive_assert_quit; 6428jump_list *save_accept = common->accept; 6429struct sljit_jump *jump; 6430struct sljit_jump *brajump = NULL; 6431 6432/* Assert captures then. */ 6433common->then_trap = NULL; 6434 6435if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) 6436 { 6437 SLJIT_ASSERT(!conditional); 6438 bra = *cc; 6439 cc++; 6440 } 6441private_data_ptr = PRIVATE_DATA(cc); 6442SLJIT_ASSERT(private_data_ptr != 0); 6443framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head); 6444backtrack->framesize = framesize; 6445backtrack->private_data_ptr = private_data_ptr; 6446opcode = *cc; 6447SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT); 6448found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target; 6449ccbegin = cc; 6450cc += GET(cc, 1); 6451 6452if (bra == OP_BRAMINZERO) 6453 { 6454 /* This is a braminzero backtrack path. */ 6455 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 6456 free_stack(common, 1); 6457 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); 6458 } 6459 6460if (framesize < 0) 6461 { 6462 extrasize = needs_control_head ? 2 : 1; 6463 if (framesize == no_frame) 6464 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0); 6465 allocate_stack(common, extrasize); 6466 if (needs_control_head) 6467 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); 6468 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 6469 if (needs_control_head) 6470 { 6471 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); 6472 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); 6473 } 6474 } 6475else 6476 { 6477 extrasize = needs_control_head ? 3 : 2; 6478 allocate_stack(common, framesize + extrasize); 6479 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 6480 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw)); 6481 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); 6482 if (needs_control_head) 6483 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); 6484 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 6485 if (needs_control_head) 6486 { 6487 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0); 6488 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); 6489 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); 6490 } 6491 else 6492 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); 6493 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE); 6494 } 6495 6496memset(&altbacktrack, 0, sizeof(backtrack_common)); 6497if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) 6498 { 6499 /* Negative assert is stronger than positive assert. */ 6500 common->local_exit = TRUE; 6501 common->quit_label = NULL; 6502 common->quit = NULL; 6503 common->positive_assert = FALSE; 6504 } 6505else 6506 common->positive_assert = TRUE; 6507common->positive_assert_quit = NULL; 6508 6509while (1) 6510 { 6511 common->accept_label = NULL; 6512 common->accept = NULL; 6513 altbacktrack.top = NULL; 6514 altbacktrack.topbacktracks = NULL; 6515 6516 if (*ccbegin == OP_ALT) 6517 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 6518 6519 altbacktrack.cc = ccbegin; 6520 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack); 6521 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 6522 { 6523 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) 6524 { 6525 common->local_exit = save_local_exit; 6526 common->quit_label = save_quit_label; 6527 common->quit = save_quit; 6528 } 6529 common->positive_assert = save_positive_assert; 6530 common->then_trap = save_then_trap; 6531 common->accept_label = save_accept_label; 6532 common->positive_assert_quit = save_positive_assert_quit; 6533 common->accept = save_accept; 6534 return NULL; 6535 } 6536 common->accept_label = LABEL(); 6537 if (common->accept != NULL) 6538 set_jumps(common->accept, common->accept_label); 6539 6540 /* Reset stack. */ 6541 if (framesize < 0) 6542 { 6543 if (framesize == no_frame) 6544 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 6545 else 6546 free_stack(common, extrasize); 6547 if (needs_control_head) 6548 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0); 6549 } 6550 else 6551 { 6552 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional) 6553 { 6554 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ 6555 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); 6556 if (needs_control_head) 6557 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0); 6558 } 6559 else 6560 { 6561 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 6562 if (needs_control_head) 6563 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw)); 6564 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 6565 } 6566 } 6567 6568 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) 6569 { 6570 /* We know that STR_PTR was stored on the top of the stack. */ 6571 if (conditional) 6572 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0); 6573 else if (bra == OP_BRAZERO) 6574 { 6575 if (framesize < 0) 6576 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw)); 6577 else 6578 { 6579 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw)); 6580 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw)); 6581 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); 6582 } 6583 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); 6584 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 6585 } 6586 else if (framesize >= 0) 6587 { 6588 /* For OP_BRA and OP_BRAMINZERO. */ 6589 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw)); 6590 } 6591 } 6592 add_jump(compiler, found, JUMP(SLJIT_JUMP)); 6593 6594 compile_backtrackingpath(common, altbacktrack.top); 6595 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 6596 { 6597 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) 6598 { 6599 common->local_exit = save_local_exit; 6600 common->quit_label = save_quit_label; 6601 common->quit = save_quit; 6602 } 6603 common->positive_assert = save_positive_assert; 6604 common->then_trap = save_then_trap; 6605 common->accept_label = save_accept_label; 6606 common->positive_assert_quit = save_positive_assert_quit; 6607 common->accept = save_accept; 6608 return NULL; 6609 } 6610 set_jumps(altbacktrack.topbacktracks, LABEL()); 6611 6612 if (*cc != OP_ALT) 6613 break; 6614 6615 ccbegin = cc; 6616 cc += GET(cc, 1); 6617 } 6618 6619if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) 6620 { 6621 SLJIT_ASSERT(common->positive_assert_quit == NULL); 6622 /* Makes the check less complicated below. */ 6623 common->positive_assert_quit = common->quit; 6624 } 6625 6626/* None of them matched. */ 6627if (common->positive_assert_quit != NULL) 6628 { 6629 jump = JUMP(SLJIT_JUMP); 6630 set_jumps(common->positive_assert_quit, LABEL()); 6631 SLJIT_ASSERT(framesize != no_stack); 6632 if (framesize < 0) 6633 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw)); 6634 else 6635 { 6636 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 6637 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 6638 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw)); 6639 } 6640 JUMPHERE(jump); 6641 } 6642 6643if (needs_control_head) 6644 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1)); 6645 6646if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) 6647 { 6648 /* Assert is failed. */ 6649 if (conditional || bra == OP_BRAZERO) 6650 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 6651 6652 if (framesize < 0) 6653 { 6654 /* The topmost item should be 0. */ 6655 if (bra == OP_BRAZERO) 6656 { 6657 if (extrasize == 2) 6658 free_stack(common, 1); 6659 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 6660 } 6661 else 6662 free_stack(common, extrasize); 6663 } 6664 else 6665 { 6666 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1)); 6667 /* The topmost item should be 0. */ 6668 if (bra == OP_BRAZERO) 6669 { 6670 free_stack(common, framesize + extrasize - 1); 6671 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 6672 } 6673 else 6674 free_stack(common, framesize + extrasize); 6675 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); 6676 } 6677 jump = JUMP(SLJIT_JUMP); 6678 if (bra != OP_BRAZERO) 6679 add_jump(compiler, target, jump); 6680 6681 /* Assert is successful. */ 6682 set_jumps(tmp, LABEL()); 6683 if (framesize < 0) 6684 { 6685 /* We know that STR_PTR was stored on the top of the stack. */ 6686 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw)); 6687 /* Keep the STR_PTR on the top of the stack. */ 6688 if (bra == OP_BRAZERO) 6689 { 6690 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); 6691 if (extrasize == 2) 6692 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 6693 } 6694 else if (bra == OP_BRAMINZERO) 6695 { 6696 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); 6697 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 6698 } 6699 } 6700 else 6701 { 6702 if (bra == OP_BRA) 6703 { 6704 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ 6705 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); 6706 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw)); 6707 } 6708 else 6709 { 6710 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ 6711 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw)); 6712 if (extrasize == 2) 6713 { 6714 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 6715 if (bra == OP_BRAMINZERO) 6716 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 6717 } 6718 else 6719 { 6720 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0); 6721 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0); 6722 } 6723 } 6724 } 6725 6726 if (bra == OP_BRAZERO) 6727 { 6728 backtrack->matchingpath = LABEL(); 6729 SET_LABEL(jump, backtrack->matchingpath); 6730 } 6731 else if (bra == OP_BRAMINZERO) 6732 { 6733 JUMPTO(SLJIT_JUMP, backtrack->matchingpath); 6734 JUMPHERE(brajump); 6735 if (framesize >= 0) 6736 { 6737 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 6738 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 6739 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw)); 6740 } 6741 set_jumps(backtrack->common.topbacktracks, LABEL()); 6742 } 6743 } 6744else 6745 { 6746 /* AssertNot is successful. */ 6747 if (framesize < 0) 6748 { 6749 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 6750 if (bra != OP_BRA) 6751 { 6752 if (extrasize == 2) 6753 free_stack(common, 1); 6754 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 6755 } 6756 else 6757 free_stack(common, extrasize); 6758 } 6759 else 6760 { 6761 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 6762 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1)); 6763 /* The topmost item should be 0. */ 6764 if (bra != OP_BRA) 6765 { 6766 free_stack(common, framesize + extrasize - 1); 6767 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 6768 } 6769 else 6770 free_stack(common, framesize + extrasize); 6771 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); 6772 } 6773 6774 if (bra == OP_BRAZERO) 6775 backtrack->matchingpath = LABEL(); 6776 else if (bra == OP_BRAMINZERO) 6777 { 6778 JUMPTO(SLJIT_JUMP, backtrack->matchingpath); 6779 JUMPHERE(brajump); 6780 } 6781 6782 if (bra != OP_BRA) 6783 { 6784 SLJIT_ASSERT(found == &backtrack->common.topbacktracks); 6785 set_jumps(backtrack->common.topbacktracks, LABEL()); 6786 backtrack->common.topbacktracks = NULL; 6787 } 6788 } 6789 6790if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) 6791 { 6792 common->local_exit = save_local_exit; 6793 common->quit_label = save_quit_label; 6794 common->quit = save_quit; 6795 } 6796common->positive_assert = save_positive_assert; 6797common->then_trap = save_then_trap; 6798common->accept_label = save_accept_label; 6799common->positive_assert_quit = save_positive_assert_quit; 6800common->accept = save_accept; 6801return cc + 1 + LINK_SIZE; 6802} 6803 6804static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head) 6805{ 6806DEFINE_COMPILER; 6807int stacksize; 6808 6809if (framesize < 0) 6810 { 6811 if (framesize == no_frame) 6812 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 6813 else 6814 { 6815 stacksize = needs_control_head ? 1 : 0; 6816 if (ket != OP_KET || has_alternatives) 6817 stacksize++; 6818 free_stack(common, stacksize); 6819 } 6820 6821 if (needs_control_head) 6822 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0); 6823 6824 /* TMP2 which is set here used by OP_KETRMAX below. */ 6825 if (ket == OP_KETRMAX) 6826 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0); 6827 else if (ket == OP_KETRMIN) 6828 { 6829 /* Move the STR_PTR to the private_data_ptr. */ 6830 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0); 6831 } 6832 } 6833else 6834 { 6835 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1; 6836 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw)); 6837 if (needs_control_head) 6838 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0); 6839 6840 if (ket == OP_KETRMAX) 6841 { 6842 /* TMP2 which is set here used by OP_KETRMAX below. */ 6843 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 6844 } 6845 } 6846if (needs_control_head) 6847 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0); 6848} 6849 6850static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr) 6851{ 6852DEFINE_COMPILER; 6853 6854if (common->capture_last_ptr != 0) 6855 { 6856 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); 6857 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1); 6858 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); 6859 stacksize++; 6860 } 6861if (common->optimized_cbracket[offset >> 1] == 0) 6862 { 6863 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); 6864 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); 6865 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); 6866 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 6867 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0); 6868 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); 6869 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); 6870 stacksize += 2; 6871 } 6872return stacksize; 6873} 6874 6875/* 6876 Handling bracketed expressions is probably the most complex part. 6877 6878 Stack layout naming characters: 6879 S - Push the current STR_PTR 6880 0 - Push a 0 (NULL) 6881 A - Push the current STR_PTR. Needed for restoring the STR_PTR 6882 before the next alternative. Not pushed if there are no alternatives. 6883 M - Any values pushed by the current alternative. Can be empty, or anything. 6884 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack. 6885 L - Push the previous local (pointed by localptr) to the stack 6886 () - opional values stored on the stack 6887 ()* - optonal, can be stored multiple times 6888 6889 The following list shows the regular expression templates, their PCRE byte codes 6890 and stack layout supported by pcre-sljit. 6891 6892 (?:) OP_BRA | OP_KET A M 6893 () OP_CBRA | OP_KET C M 6894 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )* 6895 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )* 6896 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )* 6897 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )* 6898 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )* 6899 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )* 6900 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )* 6901 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )* 6902 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 ) 6903 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 ) 6904 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 ) 6905 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 ) 6906 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )* 6907 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )* 6908 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )* 6909 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )* 6910 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )* 6911 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )* 6912 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )* 6913 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )* 6914 6915 6916 Stack layout naming characters: 6917 A - Push the alternative index (starting from 0) on the stack. 6918 Not pushed if there is no alternatives. 6919 M - Any values pushed by the current alternative. Can be empty, or anything. 6920 6921 The next list shows the possible content of a bracket: 6922 (|) OP_*BRA | OP_ALT ... M A 6923 (?()|) OP_*COND | OP_ALT M A 6924 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A 6925 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A 6926 Or nothing, if trace is unnecessary 6927*/ 6928 6929static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) 6930{ 6931DEFINE_COMPILER; 6932backtrack_common *backtrack; 6933pcre_uchar opcode; 6934int private_data_ptr = 0; 6935int offset = 0; 6936int i, stacksize; 6937int repeat_ptr = 0, repeat_length = 0; 6938int repeat_type = 0, repeat_count = 0; 6939pcre_uchar *ccbegin; 6940pcre_uchar *matchingpath; 6941pcre_uchar *slot; 6942pcre_uchar bra = OP_BRA; 6943pcre_uchar ket; 6944assert_backtrack *assert; 6945BOOL has_alternatives; 6946BOOL needs_control_head = FALSE; 6947struct sljit_jump *jump; 6948struct sljit_jump *skip; 6949struct sljit_label *rmax_label = NULL; 6950struct sljit_jump *braminzero = NULL; 6951 6952PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL); 6953 6954if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) 6955 { 6956 bra = *cc; 6957 cc++; 6958 opcode = *cc; 6959 } 6960 6961opcode = *cc; 6962ccbegin = cc; 6963matchingpath = bracketend(cc) - 1 - LINK_SIZE; 6964ket = *matchingpath; 6965if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0) 6966 { 6967 repeat_ptr = PRIVATE_DATA(matchingpath); 6968 repeat_length = PRIVATE_DATA(matchingpath + 1); 6969 repeat_type = PRIVATE_DATA(matchingpath + 2); 6970 repeat_count = PRIVATE_DATA(matchingpath + 3); 6971 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0); 6972 if (repeat_type == OP_UPTO) 6973 ket = OP_KETRMAX; 6974 if (repeat_type == OP_MINUPTO) 6975 ket = OP_KETRMIN; 6976 } 6977 6978if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF) 6979 { 6980 /* Drop this bracket_backtrack. */ 6981 parent->top = backtrack->prev; 6982 return matchingpath + 1 + LINK_SIZE + repeat_length; 6983 } 6984 6985matchingpath = ccbegin + 1 + LINK_SIZE; 6986SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN); 6987SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX))); 6988cc += GET(cc, 1); 6989 6990has_alternatives = *cc == OP_ALT; 6991if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND)) 6992 has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE; 6993 6994if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) 6995 opcode = OP_SCOND; 6996if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC)) 6997 opcode = OP_ONCE; 6998 6999if (opcode == OP_CBRA || opcode == OP_SCBRA) 7000 { 7001 /* Capturing brackets has a pre-allocated space. */ 7002 offset = GET2(ccbegin, 1 + LINK_SIZE); 7003 if (common->optimized_cbracket[offset] == 0) 7004 { 7005 private_data_ptr = OVECTOR_PRIV(offset); 7006 offset <<= 1; 7007 } 7008 else 7009 { 7010 offset <<= 1; 7011 private_data_ptr = OVECTOR(offset); 7012 } 7013 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr; 7014 matchingpath += IMM2_SIZE; 7015 } 7016else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND) 7017 { 7018 /* Other brackets simply allocate the next entry. */ 7019 private_data_ptr = PRIVATE_DATA(ccbegin); 7020 SLJIT_ASSERT(private_data_ptr != 0); 7021 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr; 7022 if (opcode == OP_ONCE) 7023 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head); 7024 } 7025 7026/* Instructions before the first alternative. */ 7027stacksize = 0; 7028if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO)) 7029 stacksize++; 7030if (bra == OP_BRAZERO) 7031 stacksize++; 7032 7033if (stacksize > 0) 7034 allocate_stack(common, stacksize); 7035 7036stacksize = 0; 7037if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO)) 7038 { 7039 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); 7040 stacksize++; 7041 } 7042 7043if (bra == OP_BRAZERO) 7044 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); 7045 7046if (bra == OP_BRAMINZERO) 7047 { 7048 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */ 7049 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 7050 if (ket != OP_KETRMIN) 7051 { 7052 free_stack(common, 1); 7053 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); 7054 } 7055 else 7056 { 7057 if (opcode == OP_ONCE || opcode >= OP_SBRA) 7058 { 7059 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); 7060 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); 7061 /* Nothing stored during the first run. */ 7062 skip = JUMP(SLJIT_JUMP); 7063 JUMPHERE(jump); 7064 /* Checking zero-length iteration. */ 7065 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0) 7066 { 7067 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */ 7068 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7069 } 7070 else 7071 { 7072 /* Except when the whole stack frame must be saved. */ 7073 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7074 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw)); 7075 } 7076 JUMPHERE(skip); 7077 } 7078 else 7079 { 7080 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); 7081 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); 7082 JUMPHERE(jump); 7083 } 7084 } 7085 } 7086 7087if (repeat_type != 0) 7088 { 7089 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count); 7090 if (repeat_type == OP_EXACT) 7091 rmax_label = LABEL(); 7092 } 7093 7094if (ket == OP_KETRMIN) 7095 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL(); 7096 7097if (ket == OP_KETRMAX) 7098 { 7099 rmax_label = LABEL(); 7100 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0) 7101 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label; 7102 } 7103 7104/* Handling capturing brackets and alternatives. */ 7105if (opcode == OP_ONCE) 7106 { 7107 stacksize = 0; 7108 if (needs_control_head) 7109 { 7110 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); 7111 stacksize++; 7112 } 7113 7114 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0) 7115 { 7116 /* Neither capturing brackets nor recursions are found in the block. */ 7117 if (ket == OP_KETRMIN) 7118 { 7119 stacksize += 2; 7120 if (!needs_control_head) 7121 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7122 } 7123 else 7124 { 7125 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame) 7126 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0); 7127 if (ket == OP_KETRMAX || has_alternatives) 7128 stacksize++; 7129 } 7130 7131 if (stacksize > 0) 7132 allocate_stack(common, stacksize); 7133 7134 stacksize = 0; 7135 if (needs_control_head) 7136 { 7137 stacksize++; 7138 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); 7139 } 7140 7141 if (ket == OP_KETRMIN) 7142 { 7143 if (needs_control_head) 7144 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7145 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); 7146 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame) 7147 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw)); 7148 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0); 7149 } 7150 else if (ket == OP_KETRMAX || has_alternatives) 7151 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); 7152 } 7153 else 7154 { 7155 if (ket != OP_KET || has_alternatives) 7156 stacksize++; 7157 7158 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1; 7159 allocate_stack(common, stacksize); 7160 7161 if (needs_control_head) 7162 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); 7163 7164 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7165 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw)); 7166 7167 stacksize = needs_control_head ? 1 : 0; 7168 if (ket != OP_KET || has_alternatives) 7169 { 7170 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); 7171 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); 7172 stacksize++; 7173 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); 7174 } 7175 else 7176 { 7177 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); 7178 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); 7179 } 7180 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE); 7181 } 7182 } 7183else if (opcode == OP_CBRA || opcode == OP_SCBRA) 7184 { 7185 /* Saving the previous values. */ 7186 if (common->optimized_cbracket[offset >> 1] != 0) 7187 { 7188 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset)); 7189 allocate_stack(common, 2); 7190 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7191 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); 7192 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); 7193 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); 7194 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); 7195 } 7196 else 7197 { 7198 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7199 allocate_stack(common, 1); 7200 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); 7201 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); 7202 } 7203 } 7204else if (opcode == OP_SBRA || opcode == OP_SCOND) 7205 { 7206 /* Saving the previous value. */ 7207 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7208 allocate_stack(common, 1); 7209 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); 7210 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); 7211 } 7212else if (has_alternatives) 7213 { 7214 /* Pushing the starting string pointer. */ 7215 allocate_stack(common, 1); 7216 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 7217 } 7218 7219/* Generating code for the first alternative. */ 7220if (opcode == OP_COND || opcode == OP_SCOND) 7221 { 7222 if (*matchingpath == OP_CREF) 7223 { 7224 SLJIT_ASSERT(has_alternatives); 7225 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), 7226 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); 7227 matchingpath += 1 + IMM2_SIZE; 7228 } 7229 else if (*matchingpath == OP_DNCREF) 7230 { 7231 SLJIT_ASSERT(has_alternatives); 7232 7233 i = GET2(matchingpath, 1 + IMM2_SIZE); 7234 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size; 7235 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); 7236 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); 7237 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0); 7238 slot += common->name_entry_size; 7239 i--; 7240 while (i-- > 0) 7241 { 7242 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0); 7243 OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0); 7244 slot += common->name_entry_size; 7245 } 7246 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); 7247 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_C_ZERO)); 7248 matchingpath += 1 + 2 * IMM2_SIZE; 7249 } 7250 else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) 7251 { 7252 /* Never has other case. */ 7253 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL; 7254 SLJIT_ASSERT(!has_alternatives); 7255 7256 if (*matchingpath == OP_RREF) 7257 { 7258 stacksize = GET2(matchingpath, 1); 7259 if (common->currententry == NULL) 7260 stacksize = 0; 7261 else if (stacksize == RREF_ANY) 7262 stacksize = 1; 7263 else if (common->currententry->start == 0) 7264 stacksize = stacksize == 0; 7265 else 7266 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); 7267 7268 if (stacksize != 0) 7269 matchingpath += 1 + IMM2_SIZE; 7270 } 7271 else 7272 { 7273 if (common->currententry == NULL || common->currententry->start == 0) 7274 stacksize = 0; 7275 else 7276 { 7277 stacksize = GET2(matchingpath, 1 + IMM2_SIZE); 7278 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size; 7279 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); 7280 while (stacksize > 0) 7281 { 7282 if ((int)GET2(slot, 0) == i) 7283 break; 7284 slot += common->name_entry_size; 7285 stacksize--; 7286 } 7287 } 7288 7289 if (stacksize != 0) 7290 matchingpath += 1 + 2 * IMM2_SIZE; 7291 } 7292 7293 /* The stacksize == 0 is a common "else" case. */ 7294 if (stacksize == 0) 7295 { 7296 if (*cc == OP_ALT) 7297 { 7298 matchingpath = cc + 1 + LINK_SIZE; 7299 cc += GET(cc, 1); 7300 } 7301 else 7302 matchingpath = cc; 7303 } 7304 } 7305 else 7306 { 7307 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT); 7308 /* Similar code as PUSH_BACKTRACK macro. */ 7309 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack)); 7310 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 7311 return NULL; 7312 memset(assert, 0, sizeof(assert_backtrack)); 7313 assert->common.cc = matchingpath; 7314 BACKTRACK_AS(bracket_backtrack)->u.assert = assert; 7315 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE); 7316 } 7317 } 7318 7319compile_matchingpath(common, matchingpath, cc, backtrack); 7320if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 7321 return NULL; 7322 7323if (opcode == OP_ONCE) 7324 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); 7325 7326stacksize = 0; 7327if (repeat_type == OP_MINUPTO) 7328 { 7329 /* We need to preserve the counter. TMP2 will be used below. */ 7330 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr); 7331 stacksize++; 7332 } 7333if (ket != OP_KET || bra != OP_BRA) 7334 stacksize++; 7335if (offset != 0) 7336 { 7337 if (common->capture_last_ptr != 0) 7338 stacksize++; 7339 if (common->optimized_cbracket[offset >> 1] == 0) 7340 stacksize += 2; 7341 } 7342if (has_alternatives && opcode != OP_ONCE) 7343 stacksize++; 7344 7345if (stacksize > 0) 7346 allocate_stack(common, stacksize); 7347 7348stacksize = 0; 7349if (repeat_type == OP_MINUPTO) 7350 { 7351 /* TMP2 was set above. */ 7352 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1); 7353 stacksize++; 7354 } 7355 7356if (ket != OP_KET || bra != OP_BRA) 7357 { 7358 if (ket != OP_KET) 7359 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); 7360 else 7361 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); 7362 stacksize++; 7363 } 7364 7365if (offset != 0) 7366 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr); 7367 7368if (has_alternatives) 7369 { 7370 if (opcode != OP_ONCE) 7371 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); 7372 if (ket != OP_KETRMAX) 7373 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); 7374 } 7375 7376/* Must be after the matchingpath label. */ 7377if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0) 7378 { 7379 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0)); 7380 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); 7381 } 7382 7383if (ket == OP_KETRMAX) 7384 { 7385 if (repeat_type != 0) 7386 { 7387 if (has_alternatives) 7388 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); 7389 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); 7390 JUMPTO(SLJIT_C_NOT_ZERO, rmax_label); 7391 /* Drop STR_PTR for greedy plus quantifier. */ 7392 if (opcode != OP_ONCE) 7393 free_stack(common, 1); 7394 } 7395 else if (opcode == OP_ONCE || opcode >= OP_SBRA) 7396 { 7397 if (has_alternatives) 7398 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); 7399 /* Checking zero-length iteration. */ 7400 if (opcode != OP_ONCE) 7401 { 7402 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label); 7403 /* Drop STR_PTR for greedy plus quantifier. */ 7404 if (bra != OP_BRAZERO) 7405 free_stack(common, 1); 7406 } 7407 else 7408 /* TMP2 must contain the starting STR_PTR. */ 7409 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label); 7410 } 7411 else 7412 JUMPTO(SLJIT_JUMP, rmax_label); 7413 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL(); 7414 } 7415 7416if (repeat_type == OP_EXACT) 7417 { 7418 count_match(common); 7419 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); 7420 JUMPTO(SLJIT_C_NOT_ZERO, rmax_label); 7421 } 7422else if (repeat_type == OP_UPTO) 7423 { 7424 /* We need to preserve the counter. */ 7425 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr); 7426 allocate_stack(common, 1); 7427 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); 7428 } 7429 7430if (bra == OP_BRAZERO) 7431 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL(); 7432 7433if (bra == OP_BRAMINZERO) 7434 { 7435 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */ 7436 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath); 7437 if (braminzero != NULL) 7438 { 7439 JUMPHERE(braminzero); 7440 /* We need to release the end pointer to perform the 7441 backtrack for the zero-length iteration. When 7442 framesize is < 0, OP_ONCE will do the release itself. */ 7443 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0) 7444 { 7445 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7446 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 7447 } 7448 else if (ket == OP_KETRMIN && opcode != OP_ONCE) 7449 free_stack(common, 1); 7450 } 7451 /* Continue to the normal backtrack. */ 7452 } 7453 7454if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO) 7455 count_match(common); 7456 7457/* Skip the other alternatives. */ 7458while (*cc == OP_ALT) 7459 cc += GET(cc, 1); 7460cc += 1 + LINK_SIZE; 7461 7462/* Temporarily encoding the needs_control_head in framesize. */ 7463if (opcode == OP_ONCE) 7464 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0); 7465return cc + repeat_length; 7466} 7467 7468static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) 7469{ 7470DEFINE_COMPILER; 7471backtrack_common *backtrack; 7472pcre_uchar opcode; 7473int private_data_ptr; 7474int cbraprivptr = 0; 7475BOOL needs_control_head; 7476int framesize; 7477int stacksize; 7478int offset = 0; 7479BOOL zero = FALSE; 7480pcre_uchar *ccbegin = NULL; 7481int stack; /* Also contains the offset of control head. */ 7482struct sljit_label *loop = NULL; 7483struct jump_list *emptymatch = NULL; 7484 7485PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL); 7486if (*cc == OP_BRAPOSZERO) 7487 { 7488 zero = TRUE; 7489 cc++; 7490 } 7491 7492opcode = *cc; 7493private_data_ptr = PRIVATE_DATA(cc); 7494SLJIT_ASSERT(private_data_ptr != 0); 7495BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr; 7496switch(opcode) 7497 { 7498 case OP_BRAPOS: 7499 case OP_SBRAPOS: 7500 ccbegin = cc + 1 + LINK_SIZE; 7501 break; 7502 7503 case OP_CBRAPOS: 7504 case OP_SCBRAPOS: 7505 offset = GET2(cc, 1 + LINK_SIZE); 7506 /* This case cannot be optimized in the same was as 7507 normal capturing brackets. */ 7508 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0); 7509 cbraprivptr = OVECTOR_PRIV(offset); 7510 offset <<= 1; 7511 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE; 7512 break; 7513 7514 default: 7515 SLJIT_ASSERT_STOP(); 7516 break; 7517 } 7518 7519framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head); 7520BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize; 7521if (framesize < 0) 7522 { 7523 if (offset != 0) 7524 { 7525 stacksize = 2; 7526 if (common->capture_last_ptr != 0) 7527 stacksize++; 7528 } 7529 else 7530 stacksize = 1; 7531 7532 if (needs_control_head) 7533 stacksize++; 7534 if (!zero) 7535 stacksize++; 7536 7537 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize; 7538 allocate_stack(common, stacksize); 7539 if (framesize == no_frame) 7540 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0); 7541 7542 stack = 0; 7543 if (offset != 0) 7544 { 7545 stack = 2; 7546 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); 7547 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); 7548 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); 7549 if (common->capture_last_ptr != 0) 7550 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); 7551 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); 7552 if (needs_control_head) 7553 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); 7554 if (common->capture_last_ptr != 0) 7555 { 7556 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0); 7557 stack = 3; 7558 } 7559 } 7560 else 7561 { 7562 if (needs_control_head) 7563 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); 7564 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 7565 stack = 1; 7566 } 7567 7568 if (needs_control_head) 7569 stack++; 7570 if (!zero) 7571 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1); 7572 if (needs_control_head) 7573 { 7574 stack--; 7575 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0); 7576 } 7577 } 7578else 7579 { 7580 stacksize = framesize + 1; 7581 if (!zero) 7582 stacksize++; 7583 if (needs_control_head) 7584 stacksize++; 7585 if (offset == 0) 7586 stacksize++; 7587 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize; 7588 7589 allocate_stack(common, stacksize); 7590 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7591 if (needs_control_head) 7592 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); 7593 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1)); 7594 7595 stack = 0; 7596 if (!zero) 7597 { 7598 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1); 7599 stack = 1; 7600 } 7601 if (needs_control_head) 7602 { 7603 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0); 7604 stack++; 7605 } 7606 if (offset == 0) 7607 { 7608 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0); 7609 stack++; 7610 } 7611 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0); 7612 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE); 7613 stack -= 1 + (offset == 0); 7614 } 7615 7616if (offset != 0) 7617 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0); 7618 7619loop = LABEL(); 7620while (*cc != OP_KETRPOS) 7621 { 7622 backtrack->top = NULL; 7623 backtrack->topbacktracks = NULL; 7624 cc += GET(cc, 1); 7625 7626 compile_matchingpath(common, ccbegin, cc, backtrack); 7627 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 7628 return NULL; 7629 7630 if (framesize < 0) 7631 { 7632 if (framesize == no_frame) 7633 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7634 7635 if (offset != 0) 7636 { 7637 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); 7638 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); 7639 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0); 7640 if (common->capture_last_ptr != 0) 7641 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1); 7642 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); 7643 } 7644 else 7645 { 7646 if (opcode == OP_SBRAPOS) 7647 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 7648 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 7649 } 7650 7651 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS) 7652 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0)); 7653 7654 if (!zero) 7655 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0); 7656 } 7657 else 7658 { 7659 if (offset != 0) 7660 { 7661 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw)); 7662 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); 7663 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); 7664 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0); 7665 if (common->capture_last_ptr != 0) 7666 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1); 7667 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); 7668 } 7669 else 7670 { 7671 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7672 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw)); 7673 if (opcode == OP_SBRAPOS) 7674 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw)); 7675 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0); 7676 } 7677 7678 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS) 7679 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0)); 7680 7681 if (!zero) 7682 { 7683 if (framesize < 0) 7684 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0); 7685 else 7686 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 7687 } 7688 } 7689 7690 if (needs_control_head) 7691 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack)); 7692 7693 JUMPTO(SLJIT_JUMP, loop); 7694 flush_stubs(common); 7695 7696 compile_backtrackingpath(common, backtrack->top); 7697 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 7698 return NULL; 7699 set_jumps(backtrack->topbacktracks, LABEL()); 7700 7701 if (framesize < 0) 7702 { 7703 if (offset != 0) 7704 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); 7705 else 7706 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 7707 } 7708 else 7709 { 7710 if (offset != 0) 7711 { 7712 /* Last alternative. */ 7713 if (*cc == OP_KETRPOS) 7714 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7715 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); 7716 } 7717 else 7718 { 7719 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 7720 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw)); 7721 } 7722 } 7723 7724 if (*cc == OP_KETRPOS) 7725 break; 7726 ccbegin = cc + 1 + LINK_SIZE; 7727 } 7728 7729/* We don't have to restore the control head in case of a failed match. */ 7730 7731backtrack->topbacktracks = NULL; 7732if (!zero) 7733 { 7734 if (framesize < 0) 7735 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0)); 7736 else /* TMP2 is set to [private_data_ptr] above. */ 7737 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0)); 7738 } 7739 7740/* None of them matched. */ 7741set_jumps(emptymatch, LABEL()); 7742count_match(common); 7743return cc + 1 + LINK_SIZE; 7744} 7745 7746static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *max, int *min, pcre_uchar **end) 7747{ 7748int class_len; 7749 7750*opcode = *cc; 7751if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO) 7752 { 7753 cc++; 7754 *type = OP_CHAR; 7755 } 7756else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI) 7757 { 7758 cc++; 7759 *type = OP_CHARI; 7760 *opcode -= OP_STARI - OP_STAR; 7761 } 7762else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO) 7763 { 7764 cc++; 7765 *type = OP_NOT; 7766 *opcode -= OP_NOTSTAR - OP_STAR; 7767 } 7768else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI) 7769 { 7770 cc++; 7771 *type = OP_NOTI; 7772 *opcode -= OP_NOTSTARI - OP_STAR; 7773 } 7774else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO) 7775 { 7776 cc++; 7777 *opcode -= OP_TYPESTAR - OP_STAR; 7778 *type = 0; 7779 } 7780else 7781 { 7782 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS); 7783 *type = *opcode; 7784 cc++; 7785 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0); 7786 *opcode = cc[class_len - 1]; 7787 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY) 7788 { 7789 *opcode -= OP_CRSTAR - OP_STAR; 7790 if (end != NULL) 7791 *end = cc + class_len; 7792 } 7793 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY) 7794 { 7795 *opcode -= OP_CRPOSSTAR - OP_POSSTAR; 7796 if (end != NULL) 7797 *end = cc + class_len; 7798 } 7799 else 7800 { 7801 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE); 7802 *max = GET2(cc, (class_len + IMM2_SIZE)); 7803 *min = GET2(cc, class_len); 7804 7805 if (*min == 0) 7806 { 7807 SLJIT_ASSERT(*max != 0); 7808 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : (*opcode == OP_CRMINRANGE ? OP_MINUPTO : OP_POSUPTO); 7809 } 7810 if (*max == *min) 7811 *opcode = OP_EXACT; 7812 7813 if (end != NULL) 7814 *end = cc + class_len + 2 * IMM2_SIZE; 7815 } 7816 return cc; 7817 } 7818 7819if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO) 7820 { 7821 *max = GET2(cc, 0); 7822 cc += IMM2_SIZE; 7823 } 7824 7825if (*type == 0) 7826 { 7827 *type = *cc; 7828 if (end != NULL) 7829 *end = next_opcode(common, cc); 7830 cc++; 7831 return cc; 7832 } 7833 7834if (end != NULL) 7835 { 7836 *end = cc + 1; 7837#ifdef SUPPORT_UTF 7838 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc); 7839#endif 7840 } 7841return cc; 7842} 7843 7844static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) 7845{ 7846DEFINE_COMPILER; 7847backtrack_common *backtrack; 7848pcre_uchar opcode; 7849pcre_uchar type; 7850int max = -1, min = -1; 7851pcre_uchar* end; 7852jump_list *nomatch = NULL; 7853struct sljit_jump *jump = NULL; 7854struct sljit_label *label; 7855int private_data_ptr = PRIVATE_DATA(cc); 7856int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP); 7857int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; 7858int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); 7859int tmp_base, tmp_offset; 7860 7861PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL); 7862 7863cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, &end); 7864 7865switch(type) 7866 { 7867 case OP_NOT_DIGIT: 7868 case OP_DIGIT: 7869 case OP_NOT_WHITESPACE: 7870 case OP_WHITESPACE: 7871 case OP_NOT_WORDCHAR: 7872 case OP_WORDCHAR: 7873 case OP_ANY: 7874 case OP_ALLANY: 7875 case OP_ANYBYTE: 7876 case OP_ANYNL: 7877 case OP_NOT_HSPACE: 7878 case OP_HSPACE: 7879 case OP_NOT_VSPACE: 7880 case OP_VSPACE: 7881 case OP_CHAR: 7882 case OP_CHARI: 7883 case OP_NOT: 7884 case OP_NOTI: 7885 case OP_CLASS: 7886 case OP_NCLASS: 7887 tmp_base = TMP3; 7888 tmp_offset = 0; 7889 break; 7890 7891 default: 7892 SLJIT_ASSERT_STOP(); 7893 /* Fall through. */ 7894 7895 case OP_EXTUNI: 7896 case OP_XCLASS: 7897 case OP_NOTPROP: 7898 case OP_PROP: 7899 tmp_base = SLJIT_MEM1(SLJIT_SP); 7900 tmp_offset = POSSESSIVE0; 7901 break; 7902 } 7903 7904switch(opcode) 7905 { 7906 case OP_STAR: 7907 case OP_PLUS: 7908 case OP_UPTO: 7909 case OP_CRRANGE: 7910 if (type == OP_ANYNL || type == OP_EXTUNI) 7911 { 7912 SLJIT_ASSERT(private_data_ptr == 0); 7913 if (opcode == OP_STAR || opcode == OP_UPTO) 7914 { 7915 allocate_stack(common, 2); 7916 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 7917 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); 7918 } 7919 else 7920 { 7921 allocate_stack(common, 1); 7922 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 7923 } 7924 7925 if (opcode == OP_UPTO || opcode == OP_CRRANGE) 7926 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0); 7927 7928 label = LABEL(); 7929 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); 7930 if (opcode == OP_UPTO || opcode == OP_CRRANGE) 7931 { 7932 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0); 7933 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); 7934 if (opcode == OP_CRRANGE && min > 0) 7935 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label); 7936 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && max > 0)) 7937 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max); 7938 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0); 7939 } 7940 7941 /* We cannot use TMP3 because of this allocate_stack. */ 7942 allocate_stack(common, 1); 7943 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 7944 JUMPTO(SLJIT_JUMP, label); 7945 if (jump != NULL) 7946 JUMPHERE(jump); 7947 } 7948 else 7949 { 7950 if (opcode == OP_PLUS) 7951 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); 7952 if (private_data_ptr == 0) 7953 allocate_stack(common, 2); 7954 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); 7955 if (opcode <= OP_PLUS) 7956 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); 7957 else 7958 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1); 7959 label = LABEL(); 7960 compile_char1_matchingpath(common, type, cc, &nomatch); 7961 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); 7962 if (opcode <= OP_PLUS) 7963 JUMPTO(SLJIT_JUMP, label); 7964 else if (opcode == OP_CRRANGE && max == 0) 7965 { 7966 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1); 7967 JUMPTO(SLJIT_JUMP, label); 7968 } 7969 else 7970 { 7971 OP1(SLJIT_MOV, TMP1, 0, base, offset1); 7972 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); 7973 OP1(SLJIT_MOV, base, offset1, TMP1, 0); 7974 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 1, label); 7975 } 7976 set_jumps(nomatch, LABEL()); 7977 if (opcode == OP_CRRANGE) 7978 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, min + 1)); 7979 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); 7980 } 7981 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); 7982 break; 7983 7984 case OP_MINSTAR: 7985 case OP_MINPLUS: 7986 if (opcode == OP_MINPLUS) 7987 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); 7988 if (private_data_ptr == 0) 7989 allocate_stack(common, 1); 7990 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); 7991 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); 7992 break; 7993 7994 case OP_MINUPTO: 7995 case OP_CRMINRANGE: 7996 if (private_data_ptr == 0) 7997 allocate_stack(common, 2); 7998 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); 7999 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1); 8000 if (opcode == OP_CRMINRANGE) 8001 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); 8002 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); 8003 break; 8004 8005 case OP_QUERY: 8006 case OP_MINQUERY: 8007 if (private_data_ptr == 0) 8008 allocate_stack(common, 1); 8009 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); 8010 if (opcode == OP_QUERY) 8011 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); 8012 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL(); 8013 break; 8014 8015 case OP_EXACT: 8016 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); 8017 label = LABEL(); 8018 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); 8019 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); 8020 JUMPTO(SLJIT_C_NOT_ZERO, label); 8021 break; 8022 8023 case OP_POSSTAR: 8024 case OP_POSPLUS: 8025 case OP_POSUPTO: 8026 if (opcode == OP_POSPLUS) 8027 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); 8028 if (opcode == OP_POSUPTO) 8029 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max); 8030 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); 8031 label = LABEL(); 8032 compile_char1_matchingpath(common, type, cc, &nomatch); 8033 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); 8034 if (opcode != OP_POSUPTO) 8035 JUMPTO(SLJIT_JUMP, label); 8036 else 8037 { 8038 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1); 8039 JUMPTO(SLJIT_C_NOT_ZERO, label); 8040 } 8041 set_jumps(nomatch, LABEL()); 8042 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); 8043 break; 8044 8045 case OP_POSQUERY: 8046 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); 8047 compile_char1_matchingpath(common, type, cc, &nomatch); 8048 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); 8049 set_jumps(nomatch, LABEL()); 8050 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); 8051 break; 8052 8053 case OP_CRPOSRANGE: 8054 /* Combination of OP_EXACT and OP_POSSTAR or OP_POSUPTO */ 8055 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, min); 8056 label = LABEL(); 8057 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks); 8058 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); 8059 JUMPTO(SLJIT_C_NOT_ZERO, label); 8060 8061 if (max != 0) 8062 { 8063 SLJIT_ASSERT(max - min > 0); 8064 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, max - min); 8065 } 8066 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); 8067 label = LABEL(); 8068 compile_char1_matchingpath(common, type, cc, &nomatch); 8069 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); 8070 if (max == 0) 8071 JUMPTO(SLJIT_JUMP, label); 8072 else 8073 { 8074 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, SLJIT_IMM, 1); 8075 JUMPTO(SLJIT_C_NOT_ZERO, label); 8076 } 8077 set_jumps(nomatch, LABEL()); 8078 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); 8079 break; 8080 8081 default: 8082 SLJIT_ASSERT_STOP(); 8083 break; 8084 } 8085 8086count_match(common); 8087return end; 8088} 8089 8090static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) 8091{ 8092DEFINE_COMPILER; 8093backtrack_common *backtrack; 8094 8095PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); 8096 8097if (*cc == OP_FAIL) 8098 { 8099 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); 8100 return cc + 1; 8101 } 8102 8103if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty) 8104 { 8105 /* No need to check notempty conditions. */ 8106 if (common->accept_label == NULL) 8107 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP)); 8108 else 8109 JUMPTO(SLJIT_JUMP, common->accept_label); 8110 return cc + 1; 8111 } 8112 8113if (common->accept_label == NULL) 8114 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0))); 8115else 8116 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label); 8117OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 8118OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty)); 8119add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); 8120OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart)); 8121if (common->accept_label == NULL) 8122 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0)); 8123else 8124 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label); 8125OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); 8126if (common->accept_label == NULL) 8127 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0)); 8128else 8129 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label); 8130add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); 8131return cc + 1; 8132} 8133 8134static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc) 8135{ 8136DEFINE_COMPILER; 8137int offset = GET2(cc, 1); 8138BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0; 8139 8140/* Data will be discarded anyway... */ 8141if (common->currententry != NULL) 8142 return cc + 1 + IMM2_SIZE; 8143 8144if (!optimized_cbracket) 8145 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset)); 8146offset <<= 1; 8147OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); 8148if (!optimized_cbracket) 8149 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); 8150return cc + 1 + IMM2_SIZE; 8151} 8152 8153static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) 8154{ 8155DEFINE_COMPILER; 8156backtrack_common *backtrack; 8157pcre_uchar opcode = *cc; 8158pcre_uchar *ccend = cc + 1; 8159 8160if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG) 8161 ccend += 2 + cc[1]; 8162 8163PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); 8164 8165if (opcode == OP_SKIP) 8166 { 8167 allocate_stack(common, 1); 8168 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 8169 return ccend; 8170 } 8171 8172if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG) 8173 { 8174 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 8175 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2)); 8176 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0); 8177 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); 8178 } 8179 8180return ccend; 8181} 8182 8183static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP }; 8184 8185static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent) 8186{ 8187DEFINE_COMPILER; 8188backtrack_common *backtrack; 8189BOOL needs_control_head; 8190int size; 8191 8192PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc); 8193common->then_trap = BACKTRACK_AS(then_trap_backtrack); 8194BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode; 8195BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start); 8196BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head); 8197 8198size = BACKTRACK_AS(then_trap_backtrack)->framesize; 8199size = 3 + (size < 0 ? 0 : size); 8200 8201OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); 8202allocate_stack(common, size); 8203if (size > 3) 8204 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw)); 8205else 8206 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0); 8207OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start); 8208OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap); 8209OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0); 8210 8211size = BACKTRACK_AS(then_trap_backtrack)->framesize; 8212if (size >= 0) 8213 init_frame(common, cc, ccend, size - 1, 0, FALSE); 8214} 8215 8216static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent) 8217{ 8218DEFINE_COMPILER; 8219backtrack_common *backtrack; 8220BOOL has_then_trap = FALSE; 8221then_trap_backtrack *save_then_trap = NULL; 8222 8223SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS)); 8224 8225if (common->has_then && common->then_offsets[cc - common->start] != 0) 8226 { 8227 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0); 8228 has_then_trap = TRUE; 8229 save_then_trap = common->then_trap; 8230 /* Tail item on backtrack. */ 8231 compile_then_trap_matchingpath(common, cc, ccend, parent); 8232 } 8233 8234while (cc < ccend) 8235 { 8236 switch(*cc) 8237 { 8238 case OP_SOD: 8239 case OP_SOM: 8240 case OP_NOT_WORD_BOUNDARY: 8241 case OP_WORD_BOUNDARY: 8242 case OP_NOT_DIGIT: 8243 case OP_DIGIT: 8244 case OP_NOT_WHITESPACE: 8245 case OP_WHITESPACE: 8246 case OP_NOT_WORDCHAR: 8247 case OP_WORDCHAR: 8248 case OP_ANY: 8249 case OP_ALLANY: 8250 case OP_ANYBYTE: 8251 case OP_NOTPROP: 8252 case OP_PROP: 8253 case OP_ANYNL: 8254 case OP_NOT_HSPACE: 8255 case OP_HSPACE: 8256 case OP_NOT_VSPACE: 8257 case OP_VSPACE: 8258 case OP_EXTUNI: 8259 case OP_EODN: 8260 case OP_EOD: 8261 case OP_CIRC: 8262 case OP_CIRCM: 8263 case OP_DOLL: 8264 case OP_DOLLM: 8265 case OP_NOT: 8266 case OP_NOTI: 8267 case OP_REVERSE: 8268 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); 8269 break; 8270 8271 case OP_SET_SOM: 8272 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); 8273 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); 8274 allocate_stack(common, 1); 8275 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0); 8276 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); 8277 cc++; 8278 break; 8279 8280 case OP_CHAR: 8281 case OP_CHARI: 8282 if (common->mode == JIT_COMPILE) 8283 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); 8284 else 8285 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); 8286 break; 8287 8288 case OP_STAR: 8289 case OP_MINSTAR: 8290 case OP_PLUS: 8291 case OP_MINPLUS: 8292 case OP_QUERY: 8293 case OP_MINQUERY: 8294 case OP_UPTO: 8295 case OP_MINUPTO: 8296 case OP_EXACT: 8297 case OP_POSSTAR: 8298 case OP_POSPLUS: 8299 case OP_POSQUERY: 8300 case OP_POSUPTO: 8301 case OP_STARI: 8302 case OP_MINSTARI: 8303 case OP_PLUSI: 8304 case OP_MINPLUSI: 8305 case OP_QUERYI: 8306 case OP_MINQUERYI: 8307 case OP_UPTOI: 8308 case OP_MINUPTOI: 8309 case OP_EXACTI: 8310 case OP_POSSTARI: 8311 case OP_POSPLUSI: 8312 case OP_POSQUERYI: 8313 case OP_POSUPTOI: 8314 case OP_NOTSTAR: 8315 case OP_NOTMINSTAR: 8316 case OP_NOTPLUS: 8317 case OP_NOTMINPLUS: 8318 case OP_NOTQUERY: 8319 case OP_NOTMINQUERY: 8320 case OP_NOTUPTO: 8321 case OP_NOTMINUPTO: 8322 case OP_NOTEXACT: 8323 case OP_NOTPOSSTAR: 8324 case OP_NOTPOSPLUS: 8325 case OP_NOTPOSQUERY: 8326 case OP_NOTPOSUPTO: 8327 case OP_NOTSTARI: 8328 case OP_NOTMINSTARI: 8329 case OP_NOTPLUSI: 8330 case OP_NOTMINPLUSI: 8331 case OP_NOTQUERYI: 8332 case OP_NOTMINQUERYI: 8333 case OP_NOTUPTOI: 8334 case OP_NOTMINUPTOI: 8335 case OP_NOTEXACTI: 8336 case OP_NOTPOSSTARI: 8337 case OP_NOTPOSPLUSI: 8338 case OP_NOTPOSQUERYI: 8339 case OP_NOTPOSUPTOI: 8340 case OP_TYPESTAR: 8341 case OP_TYPEMINSTAR: 8342 case OP_TYPEPLUS: 8343 case OP_TYPEMINPLUS: 8344 case OP_TYPEQUERY: 8345 case OP_TYPEMINQUERY: 8346 case OP_TYPEUPTO: 8347 case OP_TYPEMINUPTO: 8348 case OP_TYPEEXACT: 8349 case OP_TYPEPOSSTAR: 8350 case OP_TYPEPOSPLUS: 8351 case OP_TYPEPOSQUERY: 8352 case OP_TYPEPOSUPTO: 8353 cc = compile_iterator_matchingpath(common, cc, parent); 8354 break; 8355 8356 case OP_CLASS: 8357 case OP_NCLASS: 8358 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE) 8359 cc = compile_iterator_matchingpath(common, cc, parent); 8360 else 8361 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); 8362 break; 8363 8364#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 8365 case OP_XCLASS: 8366 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE) 8367 cc = compile_iterator_matchingpath(common, cc, parent); 8368 else 8369 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); 8370 break; 8371#endif 8372 8373 case OP_REF: 8374 case OP_REFI: 8375 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE) 8376 cc = compile_ref_iterator_matchingpath(common, cc, parent); 8377 else 8378 { 8379 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); 8380 cc += 1 + IMM2_SIZE; 8381 } 8382 break; 8383 8384 case OP_DNREF: 8385 case OP_DNREFI: 8386 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE) 8387 cc = compile_ref_iterator_matchingpath(common, cc, parent); 8388 else 8389 { 8390 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); 8391 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); 8392 cc += 1 + 2 * IMM2_SIZE; 8393 } 8394 break; 8395 8396 case OP_RECURSE: 8397 cc = compile_recurse_matchingpath(common, cc, parent); 8398 break; 8399 8400 case OP_CALLOUT: 8401 cc = compile_callout_matchingpath(common, cc, parent); 8402 break; 8403 8404 case OP_ASSERT: 8405 case OP_ASSERT_NOT: 8406 case OP_ASSERTBACK: 8407 case OP_ASSERTBACK_NOT: 8408 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc); 8409 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE); 8410 break; 8411 8412 case OP_BRAMINZERO: 8413 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc); 8414 cc = bracketend(cc + 1); 8415 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN) 8416 { 8417 allocate_stack(common, 1); 8418 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 8419 } 8420 else 8421 { 8422 allocate_stack(common, 2); 8423 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 8424 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0); 8425 } 8426 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL(); 8427 if (cc[1] > OP_ASSERTBACK_NOT) 8428 count_match(common); 8429 break; 8430 8431 case OP_ONCE: 8432 case OP_ONCE_NC: 8433 case OP_BRA: 8434 case OP_CBRA: 8435 case OP_COND: 8436 case OP_SBRA: 8437 case OP_SCBRA: 8438 case OP_SCOND: 8439 cc = compile_bracket_matchingpath(common, cc, parent); 8440 break; 8441 8442 case OP_BRAZERO: 8443 if (cc[1] > OP_ASSERTBACK_NOT) 8444 cc = compile_bracket_matchingpath(common, cc, parent); 8445 else 8446 { 8447 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc); 8448 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE); 8449 } 8450 break; 8451 8452 case OP_BRAPOS: 8453 case OP_CBRAPOS: 8454 case OP_SBRAPOS: 8455 case OP_SCBRAPOS: 8456 case OP_BRAPOSZERO: 8457 cc = compile_bracketpos_matchingpath(common, cc, parent); 8458 break; 8459 8460 case OP_MARK: 8461 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); 8462 SLJIT_ASSERT(common->mark_ptr != 0); 8463 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); 8464 allocate_stack(common, common->has_skip_arg ? 5 : 1); 8465 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 8466 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0); 8467 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2)); 8468 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0); 8469 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); 8470 if (common->has_skip_arg) 8471 { 8472 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); 8473 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0); 8474 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark); 8475 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2)); 8476 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0); 8477 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); 8478 } 8479 cc += 1 + 2 + cc[1]; 8480 break; 8481 8482 case OP_PRUNE: 8483 case OP_PRUNE_ARG: 8484 case OP_SKIP: 8485 case OP_SKIP_ARG: 8486 case OP_THEN: 8487 case OP_THEN_ARG: 8488 case OP_COMMIT: 8489 cc = compile_control_verb_matchingpath(common, cc, parent); 8490 break; 8491 8492 case OP_FAIL: 8493 case OP_ACCEPT: 8494 case OP_ASSERT_ACCEPT: 8495 cc = compile_fail_accept_matchingpath(common, cc, parent); 8496 break; 8497 8498 case OP_CLOSE: 8499 cc = compile_close_matchingpath(common, cc); 8500 break; 8501 8502 case OP_SKIPZERO: 8503 cc = bracketend(cc + 1); 8504 break; 8505 8506 default: 8507 SLJIT_ASSERT_STOP(); 8508 return; 8509 } 8510 if (cc == NULL) 8511 return; 8512 } 8513 8514if (has_then_trap) 8515 { 8516 /* Head item on backtrack. */ 8517 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc); 8518 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode; 8519 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap; 8520 common->then_trap = save_then_trap; 8521 } 8522SLJIT_ASSERT(cc == ccend); 8523} 8524 8525#undef PUSH_BACKTRACK 8526#undef PUSH_BACKTRACK_NOVALUE 8527#undef BACKTRACK_AS 8528 8529#define COMPILE_BACKTRACKINGPATH(current) \ 8530 do \ 8531 { \ 8532 compile_backtrackingpath(common, (current)); \ 8533 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ 8534 return; \ 8535 } \ 8536 while (0) 8537 8538#define CURRENT_AS(type) ((type *)current) 8539 8540static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) 8541{ 8542DEFINE_COMPILER; 8543pcre_uchar *cc = current->cc; 8544pcre_uchar opcode; 8545pcre_uchar type; 8546int max = -1, min = -1; 8547struct sljit_label *label = NULL; 8548struct sljit_jump *jump = NULL; 8549jump_list *jumplist = NULL; 8550int private_data_ptr = PRIVATE_DATA(cc); 8551int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP); 8552int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; 8553int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); 8554 8555cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, NULL); 8556 8557switch(opcode) 8558 { 8559 case OP_STAR: 8560 case OP_PLUS: 8561 case OP_UPTO: 8562 case OP_CRRANGE: 8563 if (type == OP_ANYNL || type == OP_EXTUNI) 8564 { 8565 SLJIT_ASSERT(private_data_ptr == 0); 8566 set_jumps(current->topbacktracks, LABEL()); 8567 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8568 free_stack(common, 1); 8569 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); 8570 } 8571 else 8572 { 8573 if (opcode == OP_UPTO) 8574 min = 0; 8575 if (opcode <= OP_PLUS) 8576 { 8577 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); 8578 jump = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, base, offset1); 8579 } 8580 else 8581 { 8582 OP1(SLJIT_MOV, TMP1, 0, base, offset1); 8583 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); 8584 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, min + 1); 8585 OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1); 8586 } 8587 skip_char_back(common); 8588 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); 8589 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); 8590 if (opcode == OP_CRRANGE) 8591 set_jumps(current->topbacktracks, LABEL()); 8592 JUMPHERE(jump); 8593 if (private_data_ptr == 0) 8594 free_stack(common, 2); 8595 if (opcode == OP_PLUS) 8596 set_jumps(current->topbacktracks, LABEL()); 8597 } 8598 break; 8599 8600 case OP_MINSTAR: 8601 case OP_MINPLUS: 8602 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); 8603 compile_char1_matchingpath(common, type, cc, &jumplist); 8604 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); 8605 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); 8606 set_jumps(jumplist, LABEL()); 8607 if (private_data_ptr == 0) 8608 free_stack(common, 1); 8609 if (opcode == OP_MINPLUS) 8610 set_jumps(current->topbacktracks, LABEL()); 8611 break; 8612 8613 case OP_MINUPTO: 8614 case OP_CRMINRANGE: 8615 if (opcode == OP_CRMINRANGE) 8616 { 8617 label = LABEL(); 8618 set_jumps(current->topbacktracks, label); 8619 } 8620 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); 8621 compile_char1_matchingpath(common, type, cc, &jumplist); 8622 8623 OP1(SLJIT_MOV, TMP1, 0, base, offset1); 8624 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); 8625 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); 8626 OP1(SLJIT_MOV, base, offset1, TMP1, 0); 8627 8628 if (opcode == OP_CRMINRANGE) 8629 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min + 1, label); 8630 8631 if (opcode == OP_CRMINRANGE && max == 0) 8632 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); 8633 else 8634 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 2, CURRENT_AS(iterator_backtrack)->matchingpath); 8635 8636 set_jumps(jumplist, LABEL()); 8637 if (private_data_ptr == 0) 8638 free_stack(common, 2); 8639 break; 8640 8641 case OP_QUERY: 8642 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); 8643 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); 8644 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); 8645 jump = JUMP(SLJIT_JUMP); 8646 set_jumps(current->topbacktracks, LABEL()); 8647 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); 8648 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); 8649 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); 8650 JUMPHERE(jump); 8651 if (private_data_ptr == 0) 8652 free_stack(common, 1); 8653 break; 8654 8655 case OP_MINQUERY: 8656 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); 8657 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); 8658 jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); 8659 compile_char1_matchingpath(common, type, cc, &jumplist); 8660 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath); 8661 set_jumps(jumplist, LABEL()); 8662 JUMPHERE(jump); 8663 if (private_data_ptr == 0) 8664 free_stack(common, 1); 8665 break; 8666 8667 case OP_EXACT: 8668 case OP_POSPLUS: 8669 case OP_CRPOSRANGE: 8670 set_jumps(current->topbacktracks, LABEL()); 8671 break; 8672 8673 case OP_POSSTAR: 8674 case OP_POSQUERY: 8675 case OP_POSUPTO: 8676 break; 8677 8678 default: 8679 SLJIT_ASSERT_STOP(); 8680 break; 8681 } 8682} 8683 8684static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) 8685{ 8686DEFINE_COMPILER; 8687pcre_uchar *cc = current->cc; 8688BOOL ref = (*cc == OP_REF || *cc == OP_REFI); 8689pcre_uchar type; 8690 8691type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE]; 8692 8693if ((type & 0x1) == 0) 8694 { 8695 /* Maximize case. */ 8696 set_jumps(current->topbacktracks, LABEL()); 8697 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8698 free_stack(common, 1); 8699 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); 8700 return; 8701 } 8702 8703OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8704CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath); 8705set_jumps(current->topbacktracks, LABEL()); 8706free_stack(common, ref ? 2 : 3); 8707} 8708 8709static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current) 8710{ 8711DEFINE_COMPILER; 8712 8713if (CURRENT_AS(recurse_backtrack)->inlined_pattern) 8714 compile_backtrackingpath(common, current->top); 8715set_jumps(current->topbacktracks, LABEL()); 8716if (CURRENT_AS(recurse_backtrack)->inlined_pattern) 8717 return; 8718 8719if (common->has_set_som && common->mark_ptr != 0) 8720 { 8721 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8722 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); 8723 free_stack(common, 2); 8724 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0); 8725 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0); 8726 } 8727else if (common->has_set_som || common->mark_ptr != 0) 8728 { 8729 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8730 free_stack(common, 1); 8731 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0); 8732 } 8733} 8734 8735static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current) 8736{ 8737DEFINE_COMPILER; 8738pcre_uchar *cc = current->cc; 8739pcre_uchar bra = OP_BRA; 8740struct sljit_jump *brajump = NULL; 8741 8742SLJIT_ASSERT(*cc != OP_BRAMINZERO); 8743if (*cc == OP_BRAZERO) 8744 { 8745 bra = *cc; 8746 cc++; 8747 } 8748 8749if (bra == OP_BRAZERO) 8750 { 8751 SLJIT_ASSERT(current->topbacktracks == NULL); 8752 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8753 } 8754 8755if (CURRENT_AS(assert_backtrack)->framesize < 0) 8756 { 8757 set_jumps(current->topbacktracks, LABEL()); 8758 8759 if (bra == OP_BRAZERO) 8760 { 8761 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 8762 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath); 8763 free_stack(common, 1); 8764 } 8765 return; 8766 } 8767 8768if (bra == OP_BRAZERO) 8769 { 8770 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT) 8771 { 8772 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 8773 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath); 8774 free_stack(common, 1); 8775 return; 8776 } 8777 free_stack(common, 1); 8778 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); 8779 } 8780 8781if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK) 8782 { 8783 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr); 8784 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 8785 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_backtrack)->framesize * sizeof(sljit_sw)); 8786 8787 set_jumps(current->topbacktracks, LABEL()); 8788 } 8789else 8790 set_jumps(current->topbacktracks, LABEL()); 8791 8792if (bra == OP_BRAZERO) 8793 { 8794 /* We know there is enough place on the stack. */ 8795 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); 8796 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); 8797 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath); 8798 JUMPHERE(brajump); 8799 } 8800} 8801 8802static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current) 8803{ 8804DEFINE_COMPILER; 8805int opcode, stacksize, alt_count, alt_max; 8806int offset = 0; 8807int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr; 8808int repeat_ptr = 0, repeat_type = 0, repeat_count = 0; 8809pcre_uchar *cc = current->cc; 8810pcre_uchar *ccbegin; 8811pcre_uchar *ccprev; 8812pcre_uchar bra = OP_BRA; 8813pcre_uchar ket; 8814assert_backtrack *assert; 8815sljit_uw *next_update_addr = NULL; 8816BOOL has_alternatives; 8817BOOL needs_control_head = FALSE; 8818struct sljit_jump *brazero = NULL; 8819struct sljit_jump *alt1 = NULL; 8820struct sljit_jump *alt2 = NULL; 8821struct sljit_jump *once = NULL; 8822struct sljit_jump *cond = NULL; 8823struct sljit_label *rmin_label = NULL; 8824struct sljit_label *exact_label = NULL; 8825 8826if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) 8827 { 8828 bra = *cc; 8829 cc++; 8830 } 8831 8832opcode = *cc; 8833ccbegin = bracketend(cc) - 1 - LINK_SIZE; 8834ket = *ccbegin; 8835if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0) 8836 { 8837 repeat_ptr = PRIVATE_DATA(ccbegin); 8838 repeat_type = PRIVATE_DATA(ccbegin + 2); 8839 repeat_count = PRIVATE_DATA(ccbegin + 3); 8840 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0); 8841 if (repeat_type == OP_UPTO) 8842 ket = OP_KETRMAX; 8843 if (repeat_type == OP_MINUPTO) 8844 ket = OP_KETRMIN; 8845 } 8846ccbegin = cc; 8847cc += GET(cc, 1); 8848has_alternatives = *cc == OP_ALT; 8849if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) 8850 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL; 8851if (opcode == OP_CBRA || opcode == OP_SCBRA) 8852 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1; 8853if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) 8854 opcode = OP_SCOND; 8855if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC)) 8856 opcode = OP_ONCE; 8857 8858alt_max = has_alternatives ? no_alternatives(ccbegin) : 0; 8859 8860/* Decoding the needs_control_head in framesize. */ 8861if (opcode == OP_ONCE) 8862 { 8863 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0; 8864 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1; 8865 } 8866 8867if (ket != OP_KET && repeat_type != 0) 8868 { 8869 /* TMP1 is used in OP_KETRMIN below. */ 8870 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8871 free_stack(common, 1); 8872 if (repeat_type == OP_UPTO) 8873 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1); 8874 else 8875 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0); 8876 } 8877 8878if (ket == OP_KETRMAX) 8879 { 8880 if (bra == OP_BRAZERO) 8881 { 8882 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8883 free_stack(common, 1); 8884 brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0); 8885 } 8886 } 8887else if (ket == OP_KETRMIN) 8888 { 8889 if (bra != OP_BRAMINZERO) 8890 { 8891 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8892 if (repeat_type != 0) 8893 { 8894 /* TMP1 was set a few lines above. */ 8895 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); 8896 /* Drop STR_PTR for non-greedy plus quantifier. */ 8897 if (opcode != OP_ONCE) 8898 free_stack(common, 1); 8899 } 8900 else if (opcode >= OP_SBRA || opcode == OP_ONCE) 8901 { 8902 /* Checking zero-length iteration. */ 8903 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0) 8904 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); 8905 else 8906 { 8907 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 8908 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw), CURRENT_AS(bracket_backtrack)->recursive_matchingpath); 8909 } 8910 /* Drop STR_PTR for non-greedy plus quantifier. */ 8911 if (opcode != OP_ONCE) 8912 free_stack(common, 1); 8913 } 8914 else 8915 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); 8916 } 8917 rmin_label = LABEL(); 8918 if (repeat_type != 0) 8919 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); 8920 } 8921else if (bra == OP_BRAZERO) 8922 { 8923 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8924 free_stack(common, 1); 8925 brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0); 8926 } 8927else if (repeat_type == OP_EXACT) 8928 { 8929 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); 8930 exact_label = LABEL(); 8931 } 8932 8933if (offset != 0) 8934 { 8935 if (common->capture_last_ptr != 0) 8936 { 8937 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0); 8938 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8939 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); 8940 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0); 8941 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); 8942 free_stack(common, 3); 8943 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0); 8944 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0); 8945 } 8946 else if (common->optimized_cbracket[offset >> 1] == 0) 8947 { 8948 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8949 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); 8950 free_stack(common, 2); 8951 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); 8952 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); 8953 } 8954 } 8955 8956if (SLJIT_UNLIKELY(opcode == OP_ONCE)) 8957 { 8958 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) 8959 { 8960 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 8961 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 8962 } 8963 once = JUMP(SLJIT_JUMP); 8964 } 8965else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) 8966 { 8967 if (has_alternatives) 8968 { 8969 /* Always exactly one alternative. */ 8970 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8971 free_stack(common, 1); 8972 8973 alt_max = 2; 8974 alt1 = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw)); 8975 } 8976 } 8977else if (has_alternatives) 8978 { 8979 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 8980 free_stack(common, 1); 8981 8982 if (alt_max > 4) 8983 { 8984 /* Table jump if alt_max is greater than 4. */ 8985 next_update_addr = common->read_only_data_ptr; 8986 common->read_only_data_ptr += alt_max; 8987 sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr); 8988 add_label_addr(common, next_update_addr++); 8989 } 8990 else 8991 { 8992 if (alt_max == 4) 8993 alt2 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw)); 8994 alt1 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw)); 8995 } 8996 } 8997 8998COMPILE_BACKTRACKINGPATH(current->top); 8999if (current->topbacktracks) 9000 set_jumps(current->topbacktracks, LABEL()); 9001 9002if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) 9003 { 9004 /* Conditional block always has at most one alternative. */ 9005 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) 9006 { 9007 SLJIT_ASSERT(has_alternatives); 9008 assert = CURRENT_AS(bracket_backtrack)->u.assert; 9009 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK)) 9010 { 9011 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr); 9012 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 9013 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw)); 9014 } 9015 cond = JUMP(SLJIT_JUMP); 9016 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL()); 9017 } 9018 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL) 9019 { 9020 SLJIT_ASSERT(has_alternatives); 9021 cond = JUMP(SLJIT_JUMP); 9022 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL()); 9023 } 9024 else 9025 SLJIT_ASSERT(!has_alternatives); 9026 } 9027 9028if (has_alternatives) 9029 { 9030 alt_count = sizeof(sljit_uw); 9031 do 9032 { 9033 current->top = NULL; 9034 current->topbacktracks = NULL; 9035 current->nextbacktracks = NULL; 9036 /* Conditional blocks always have an additional alternative, even if it is empty. */ 9037 if (*cc == OP_ALT) 9038 { 9039 ccprev = cc + 1 + LINK_SIZE; 9040 cc += GET(cc, 1); 9041 if (opcode != OP_COND && opcode != OP_SCOND) 9042 { 9043 if (opcode != OP_ONCE) 9044 { 9045 if (private_data_ptr != 0) 9046 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); 9047 else 9048 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9049 } 9050 else 9051 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0)); 9052 } 9053 compile_matchingpath(common, ccprev, cc, current); 9054 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 9055 return; 9056 } 9057 9058 /* Instructions after the current alternative is successfully matched. */ 9059 /* There is a similar code in compile_bracket_matchingpath. */ 9060 if (opcode == OP_ONCE) 9061 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); 9062 9063 stacksize = 0; 9064 if (repeat_type == OP_MINUPTO) 9065 { 9066 /* We need to preserve the counter. TMP2 will be used below. */ 9067 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr); 9068 stacksize++; 9069 } 9070 if (ket != OP_KET || bra != OP_BRA) 9071 stacksize++; 9072 if (offset != 0) 9073 { 9074 if (common->capture_last_ptr != 0) 9075 stacksize++; 9076 if (common->optimized_cbracket[offset >> 1] == 0) 9077 stacksize += 2; 9078 } 9079 if (opcode != OP_ONCE) 9080 stacksize++; 9081 9082 if (stacksize > 0) 9083 allocate_stack(common, stacksize); 9084 9085 stacksize = 0; 9086 if (repeat_type == OP_MINUPTO) 9087 { 9088 /* TMP2 was set above. */ 9089 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1); 9090 stacksize++; 9091 } 9092 9093 if (ket != OP_KET || bra != OP_BRA) 9094 { 9095 if (ket != OP_KET) 9096 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); 9097 else 9098 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); 9099 stacksize++; 9100 } 9101 9102 if (offset != 0) 9103 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr); 9104 9105 if (opcode != OP_ONCE) 9106 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count); 9107 9108 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0) 9109 { 9110 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */ 9111 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0)); 9112 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); 9113 } 9114 9115 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath); 9116 9117 if (opcode != OP_ONCE) 9118 { 9119 if (alt_max > 4) 9120 add_label_addr(common, next_update_addr++); 9121 else 9122 { 9123 if (alt_count != 2 * sizeof(sljit_uw)) 9124 { 9125 JUMPHERE(alt1); 9126 if (alt_max == 3 && alt_count == sizeof(sljit_uw)) 9127 alt2 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw)); 9128 } 9129 else 9130 { 9131 JUMPHERE(alt2); 9132 if (alt_max == 4) 9133 alt1 = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw)); 9134 } 9135 } 9136 alt_count += sizeof(sljit_uw); 9137 } 9138 9139 COMPILE_BACKTRACKINGPATH(current->top); 9140 if (current->topbacktracks) 9141 set_jumps(current->topbacktracks, LABEL()); 9142 SLJIT_ASSERT(!current->nextbacktracks); 9143 } 9144 while (*cc == OP_ALT); 9145 9146 if (cond != NULL) 9147 { 9148 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND); 9149 assert = CURRENT_AS(bracket_backtrack)->u.assert; 9150 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0) 9151 { 9152 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr); 9153 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 9154 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_sw)); 9155 } 9156 JUMPHERE(cond); 9157 } 9158 9159 /* Free the STR_PTR. */ 9160 if (private_data_ptr == 0) 9161 free_stack(common, 1); 9162 } 9163 9164if (offset != 0) 9165 { 9166 /* Using both tmp register is better for instruction scheduling. */ 9167 if (common->optimized_cbracket[offset >> 1] != 0) 9168 { 9169 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9170 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); 9171 free_stack(common, 2); 9172 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); 9173 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); 9174 } 9175 else 9176 { 9177 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9178 free_stack(common, 1); 9179 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); 9180 } 9181 } 9182else if (opcode == OP_SBRA || opcode == OP_SCOND) 9183 { 9184 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0)); 9185 free_stack(common, 1); 9186 } 9187else if (opcode == OP_ONCE) 9188 { 9189 cc = ccbegin + GET(ccbegin, 1); 9190 stacksize = needs_control_head ? 1 : 0; 9191 9192 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) 9193 { 9194 /* Reset head and drop saved frame. */ 9195 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1); 9196 } 9197 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN)) 9198 { 9199 /* The STR_PTR must be released. */ 9200 stacksize++; 9201 } 9202 free_stack(common, stacksize); 9203 9204 JUMPHERE(once); 9205 /* Restore previous private_data_ptr */ 9206 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) 9207 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_backtrack)->u.framesize * sizeof(sljit_sw)); 9208 else if (ket == OP_KETRMIN) 9209 { 9210 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); 9211 /* See the comment below. */ 9212 free_stack(common, 2); 9213 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); 9214 } 9215 } 9216 9217if (repeat_type == OP_EXACT) 9218 { 9219 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); 9220 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0); 9221 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label); 9222 } 9223else if (ket == OP_KETRMAX) 9224 { 9225 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9226 if (bra != OP_BRAZERO) 9227 free_stack(common, 1); 9228 9229 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); 9230 if (bra == OP_BRAZERO) 9231 { 9232 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); 9233 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath); 9234 JUMPHERE(brazero); 9235 free_stack(common, 1); 9236 } 9237 } 9238else if (ket == OP_KETRMIN) 9239 { 9240 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9241 9242 /* OP_ONCE removes everything in case of a backtrack, so we don't 9243 need to explicitly release the STR_PTR. The extra release would 9244 affect badly the free_stack(2) above. */ 9245 if (opcode != OP_ONCE) 9246 free_stack(common, 1); 9247 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label); 9248 if (opcode == OP_ONCE) 9249 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1); 9250 else if (bra == OP_BRAMINZERO) 9251 free_stack(common, 1); 9252 } 9253else if (bra == OP_BRAZERO) 9254 { 9255 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9256 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath); 9257 JUMPHERE(brazero); 9258 } 9259} 9260 9261static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current) 9262{ 9263DEFINE_COMPILER; 9264int offset; 9265struct sljit_jump *jump; 9266 9267if (CURRENT_AS(bracketpos_backtrack)->framesize < 0) 9268 { 9269 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS) 9270 { 9271 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1; 9272 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9273 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); 9274 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); 9275 if (common->capture_last_ptr != 0) 9276 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); 9277 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); 9278 if (common->capture_last_ptr != 0) 9279 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0); 9280 } 9281 set_jumps(current->topbacktracks, LABEL()); 9282 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); 9283 return; 9284 } 9285 9286OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr); 9287add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 9288 9289if (current->topbacktracks) 9290 { 9291 jump = JUMP(SLJIT_JUMP); 9292 set_jumps(current->topbacktracks, LABEL()); 9293 /* Drop the stack frame. */ 9294 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); 9295 JUMPHERE(jump); 9296 } 9297OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_backtrack)->framesize * sizeof(sljit_sw)); 9298} 9299 9300static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current) 9301{ 9302assert_backtrack backtrack; 9303 9304current->top = NULL; 9305current->topbacktracks = NULL; 9306current->nextbacktracks = NULL; 9307if (current->cc[1] > OP_ASSERTBACK_NOT) 9308 { 9309 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */ 9310 compile_bracket_matchingpath(common, current->cc, current); 9311 compile_bracket_backtrackingpath(common, current->top); 9312 } 9313else 9314 { 9315 memset(&backtrack, 0, sizeof(backtrack)); 9316 backtrack.common.cc = current->cc; 9317 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath; 9318 /* Manual call of compile_assert_matchingpath. */ 9319 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE); 9320 } 9321SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks); 9322} 9323 9324static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current) 9325{ 9326DEFINE_COMPILER; 9327pcre_uchar opcode = *current->cc; 9328struct sljit_label *loop; 9329struct sljit_jump *jump; 9330 9331if (opcode == OP_THEN || opcode == OP_THEN_ARG) 9332 { 9333 if (common->then_trap != NULL) 9334 { 9335 SLJIT_ASSERT(common->control_head_ptr != 0); 9336 9337 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); 9338 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap); 9339 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start); 9340 jump = JUMP(SLJIT_JUMP); 9341 9342 loop = LABEL(); 9343 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), -(int)sizeof(sljit_sw)); 9344 JUMPHERE(jump); 9345 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(2 * sizeof(sljit_sw)), TMP1, 0, loop); 9346 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), -(int)(3 * sizeof(sljit_sw)), TMP2, 0, loop); 9347 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP)); 9348 return; 9349 } 9350 else if (common->positive_assert) 9351 { 9352 add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP)); 9353 return; 9354 } 9355 } 9356 9357if (common->local_exit) 9358 { 9359 if (common->quit_label == NULL) 9360 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); 9361 else 9362 JUMPTO(SLJIT_JUMP, common->quit_label); 9363 return; 9364 } 9365 9366if (opcode == OP_SKIP_ARG) 9367 { 9368 SLJIT_ASSERT(common->control_head_ptr != 0); 9369 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); 9370 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); 9371 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2)); 9372 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark)); 9373 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); 9374 9375 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); 9376 add_jump(compiler, &common->reset_match, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, -1)); 9377 return; 9378 } 9379 9380if (opcode == OP_SKIP) 9381 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9382else 9383 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0); 9384add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP)); 9385} 9386 9387static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current) 9388{ 9389DEFINE_COMPILER; 9390struct sljit_jump *jump; 9391int size; 9392 9393if (CURRENT_AS(then_trap_backtrack)->then_trap) 9394 { 9395 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap; 9396 return; 9397 } 9398 9399size = CURRENT_AS(then_trap_backtrack)->framesize; 9400size = 3 + (size < 0 ? 0 : size); 9401 9402OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3)); 9403free_stack(common, size); 9404jump = JUMP(SLJIT_JUMP); 9405 9406set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL()); 9407/* STACK_TOP is set by THEN. */ 9408if (CURRENT_AS(then_trap_backtrack)->framesize >= 0) 9409 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 9410OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9411free_stack(common, 3); 9412 9413JUMPHERE(jump); 9414OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0); 9415} 9416 9417static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current) 9418{ 9419DEFINE_COMPILER; 9420then_trap_backtrack *save_then_trap = common->then_trap; 9421 9422while (current) 9423 { 9424 if (current->nextbacktracks != NULL) 9425 set_jumps(current->nextbacktracks, LABEL()); 9426 switch(*current->cc) 9427 { 9428 case OP_SET_SOM: 9429 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9430 free_stack(common, 1); 9431 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0); 9432 break; 9433 9434 case OP_STAR: 9435 case OP_MINSTAR: 9436 case OP_PLUS: 9437 case OP_MINPLUS: 9438 case OP_QUERY: 9439 case OP_MINQUERY: 9440 case OP_UPTO: 9441 case OP_MINUPTO: 9442 case OP_EXACT: 9443 case OP_POSSTAR: 9444 case OP_POSPLUS: 9445 case OP_POSQUERY: 9446 case OP_POSUPTO: 9447 case OP_STARI: 9448 case OP_MINSTARI: 9449 case OP_PLUSI: 9450 case OP_MINPLUSI: 9451 case OP_QUERYI: 9452 case OP_MINQUERYI: 9453 case OP_UPTOI: 9454 case OP_MINUPTOI: 9455 case OP_EXACTI: 9456 case OP_POSSTARI: 9457 case OP_POSPLUSI: 9458 case OP_POSQUERYI: 9459 case OP_POSUPTOI: 9460 case OP_NOTSTAR: 9461 case OP_NOTMINSTAR: 9462 case OP_NOTPLUS: 9463 case OP_NOTMINPLUS: 9464 case OP_NOTQUERY: 9465 case OP_NOTMINQUERY: 9466 case OP_NOTUPTO: 9467 case OP_NOTMINUPTO: 9468 case OP_NOTEXACT: 9469 case OP_NOTPOSSTAR: 9470 case OP_NOTPOSPLUS: 9471 case OP_NOTPOSQUERY: 9472 case OP_NOTPOSUPTO: 9473 case OP_NOTSTARI: 9474 case OP_NOTMINSTARI: 9475 case OP_NOTPLUSI: 9476 case OP_NOTMINPLUSI: 9477 case OP_NOTQUERYI: 9478 case OP_NOTMINQUERYI: 9479 case OP_NOTUPTOI: 9480 case OP_NOTMINUPTOI: 9481 case OP_NOTEXACTI: 9482 case OP_NOTPOSSTARI: 9483 case OP_NOTPOSPLUSI: 9484 case OP_NOTPOSQUERYI: 9485 case OP_NOTPOSUPTOI: 9486 case OP_TYPESTAR: 9487 case OP_TYPEMINSTAR: 9488 case OP_TYPEPLUS: 9489 case OP_TYPEMINPLUS: 9490 case OP_TYPEQUERY: 9491 case OP_TYPEMINQUERY: 9492 case OP_TYPEUPTO: 9493 case OP_TYPEMINUPTO: 9494 case OP_TYPEEXACT: 9495 case OP_TYPEPOSSTAR: 9496 case OP_TYPEPOSPLUS: 9497 case OP_TYPEPOSQUERY: 9498 case OP_TYPEPOSUPTO: 9499 case OP_CLASS: 9500 case OP_NCLASS: 9501#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 9502 case OP_XCLASS: 9503#endif 9504 compile_iterator_backtrackingpath(common, current); 9505 break; 9506 9507 case OP_REF: 9508 case OP_REFI: 9509 case OP_DNREF: 9510 case OP_DNREFI: 9511 compile_ref_iterator_backtrackingpath(common, current); 9512 break; 9513 9514 case OP_RECURSE: 9515 compile_recurse_backtrackingpath(common, current); 9516 break; 9517 9518 case OP_ASSERT: 9519 case OP_ASSERT_NOT: 9520 case OP_ASSERTBACK: 9521 case OP_ASSERTBACK_NOT: 9522 compile_assert_backtrackingpath(common, current); 9523 break; 9524 9525 case OP_ONCE: 9526 case OP_ONCE_NC: 9527 case OP_BRA: 9528 case OP_CBRA: 9529 case OP_COND: 9530 case OP_SBRA: 9531 case OP_SCBRA: 9532 case OP_SCOND: 9533 compile_bracket_backtrackingpath(common, current); 9534 break; 9535 9536 case OP_BRAZERO: 9537 if (current->cc[1] > OP_ASSERTBACK_NOT) 9538 compile_bracket_backtrackingpath(common, current); 9539 else 9540 compile_assert_backtrackingpath(common, current); 9541 break; 9542 9543 case OP_BRAPOS: 9544 case OP_CBRAPOS: 9545 case OP_SBRAPOS: 9546 case OP_SCBRAPOS: 9547 case OP_BRAPOSZERO: 9548 compile_bracketpos_backtrackingpath(common, current); 9549 break; 9550 9551 case OP_BRAMINZERO: 9552 compile_braminzero_backtrackingpath(common, current); 9553 break; 9554 9555 case OP_MARK: 9556 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0)); 9557 if (common->has_skip_arg) 9558 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9559 free_stack(common, common->has_skip_arg ? 5 : 1); 9560 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0); 9561 if (common->has_skip_arg) 9562 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0); 9563 break; 9564 9565 case OP_THEN: 9566 case OP_THEN_ARG: 9567 case OP_PRUNE: 9568 case OP_PRUNE_ARG: 9569 case OP_SKIP: 9570 case OP_SKIP_ARG: 9571 compile_control_verb_backtrackingpath(common, current); 9572 break; 9573 9574 case OP_COMMIT: 9575 if (!common->local_exit) 9576 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); 9577 if (common->quit_label == NULL) 9578 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); 9579 else 9580 JUMPTO(SLJIT_JUMP, common->quit_label); 9581 break; 9582 9583 case OP_CALLOUT: 9584 case OP_FAIL: 9585 case OP_ACCEPT: 9586 case OP_ASSERT_ACCEPT: 9587 set_jumps(current->topbacktracks, LABEL()); 9588 break; 9589 9590 case OP_THEN_TRAP: 9591 /* A virtual opcode for then traps. */ 9592 compile_then_trap_backtrackingpath(common, current); 9593 break; 9594 9595 default: 9596 SLJIT_ASSERT_STOP(); 9597 break; 9598 } 9599 current = current->prev; 9600 } 9601common->then_trap = save_then_trap; 9602} 9603 9604static SLJIT_INLINE void compile_recurse(compiler_common *common) 9605{ 9606DEFINE_COMPILER; 9607pcre_uchar *cc = common->start + common->currententry->start; 9608pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE); 9609pcre_uchar *ccend = bracketend(cc); 9610BOOL needs_control_head; 9611int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head); 9612int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head); 9613int alternativesize; 9614BOOL needs_frame; 9615backtrack_common altbacktrack; 9616struct sljit_jump *jump; 9617 9618/* Recurse captures then. */ 9619common->then_trap = NULL; 9620 9621SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS); 9622needs_frame = framesize >= 0; 9623if (!needs_frame) 9624 framesize = 0; 9625alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0; 9626 9627SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0); 9628common->currententry->entry = LABEL(); 9629set_jumps(common->currententry->calls, common->currententry->entry); 9630 9631sljit_emit_fast_enter(compiler, TMP2, 0); 9632allocate_stack(common, private_data_size + framesize + alternativesize); 9633OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0); 9634copy_private_data(common, ccbegin, ccend, TRUE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head); 9635if (needs_control_head) 9636 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); 9637OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0); 9638if (needs_frame) 9639 init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE); 9640 9641if (alternativesize > 0) 9642 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); 9643 9644memset(&altbacktrack, 0, sizeof(backtrack_common)); 9645common->quit_label = NULL; 9646common->accept_label = NULL; 9647common->quit = NULL; 9648common->accept = NULL; 9649altbacktrack.cc = ccbegin; 9650cc += GET(cc, 1); 9651while (1) 9652 { 9653 altbacktrack.top = NULL; 9654 altbacktrack.topbacktracks = NULL; 9655 9656 if (altbacktrack.cc != ccbegin) 9657 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); 9658 9659 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack); 9660 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 9661 return; 9662 9663 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP)); 9664 9665 compile_backtrackingpath(common, altbacktrack.top); 9666 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 9667 return; 9668 set_jumps(altbacktrack.topbacktracks, LABEL()); 9669 9670 if (*cc != OP_ALT) 9671 break; 9672 9673 altbacktrack.cc = cc + 1 + LINK_SIZE; 9674 cc += GET(cc, 1); 9675 } 9676 9677/* None of them matched. */ 9678OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); 9679jump = JUMP(SLJIT_JUMP); 9680 9681if (common->quit != NULL) 9682 { 9683 set_jumps(common->quit, LABEL()); 9684 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); 9685 if (needs_frame) 9686 { 9687 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); 9688 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 9689 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); 9690 } 9691 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); 9692 common->quit = NULL; 9693 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); 9694 } 9695 9696set_jumps(common->accept, LABEL()); 9697OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); 9698if (needs_frame) 9699 { 9700 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); 9701 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); 9702 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); 9703 } 9704OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1); 9705 9706JUMPHERE(jump); 9707if (common->quit != NULL) 9708 set_jumps(common->quit, LABEL()); 9709copy_private_data(common, ccbegin, ccend, FALSE, private_data_size + framesize + alternativesize, framesize + alternativesize, needs_control_head); 9710free_stack(common, private_data_size + framesize + alternativesize); 9711if (needs_control_head) 9712 { 9713 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 2 * sizeof(sljit_sw)); 9714 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw)); 9715 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0); 9716 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); 9717 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0); 9718 } 9719else 9720 { 9721 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_sw)); 9722 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); 9723 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0); 9724 } 9725sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0); 9726} 9727 9728#undef COMPILE_BACKTRACKINGPATH 9729#undef CURRENT_AS 9730 9731void 9732PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode) 9733{ 9734struct sljit_compiler *compiler; 9735backtrack_common rootbacktrack; 9736compiler_common common_data; 9737compiler_common *common = &common_data; 9738const pcre_uint8 *tables = re->tables; 9739pcre_study_data *study; 9740int private_data_size; 9741pcre_uchar *ccend; 9742executable_functions *functions; 9743void *executable_func; 9744sljit_uw executable_size; 9745sljit_uw total_length; 9746label_addr_list *label_addr; 9747struct sljit_label *mainloop_label = NULL; 9748struct sljit_label *continue_match_label; 9749struct sljit_label *empty_match_found_label = NULL; 9750struct sljit_label *empty_match_backtrack_label = NULL; 9751struct sljit_label *reset_match_label; 9752struct sljit_label *quit_label; 9753struct sljit_jump *jump; 9754struct sljit_jump *minlength_check_failed = NULL; 9755struct sljit_jump *reqbyte_notfound = NULL; 9756struct sljit_jump *empty_match = NULL; 9757 9758SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0); 9759study = extra->study_data; 9760 9761if (!tables) 9762 tables = PRIV(default_tables); 9763 9764memset(&rootbacktrack, 0, sizeof(backtrack_common)); 9765memset(common, 0, sizeof(compiler_common)); 9766rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size; 9767 9768common->start = rootbacktrack.cc; 9769common->read_only_data = NULL; 9770common->read_only_data_size = 0; 9771common->read_only_data_ptr = NULL; 9772common->fcc = tables + fcc_offset; 9773common->lcc = (sljit_sw)(tables + lcc_offset); 9774common->mode = mode; 9775common->might_be_empty = study->minlength == 0; 9776common->nltype = NLTYPE_FIXED; 9777switch(re->options & PCRE_NEWLINE_BITS) 9778 { 9779 case 0: 9780 /* Compile-time default */ 9781 switch(NEWLINE) 9782 { 9783 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break; 9784 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break; 9785 default: common->newline = NEWLINE; break; 9786 } 9787 break; 9788 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break; 9789 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break; 9790 case PCRE_NEWLINE_CR+ 9791 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break; 9792 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break; 9793 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break; 9794 default: return; 9795 } 9796common->nlmax = READ_CHAR_MAX; 9797common->nlmin = 0; 9798if ((re->options & PCRE_BSR_ANYCRLF) != 0) 9799 common->bsr_nltype = NLTYPE_ANYCRLF; 9800else if ((re->options & PCRE_BSR_UNICODE) != 0) 9801 common->bsr_nltype = NLTYPE_ANY; 9802else 9803 { 9804#ifdef BSR_ANYCRLF 9805 common->bsr_nltype = NLTYPE_ANYCRLF; 9806#else 9807 common->bsr_nltype = NLTYPE_ANY; 9808#endif 9809 } 9810common->bsr_nlmax = READ_CHAR_MAX; 9811common->bsr_nlmin = 0; 9812common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; 9813common->ctypes = (sljit_sw)(tables + ctypes_offset); 9814common->name_table = ((pcre_uchar *)re) + re->name_table_offset; 9815common->name_count = re->name_count; 9816common->name_entry_size = re->name_entry_size; 9817common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; 9818#ifdef SUPPORT_UTF 9819/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */ 9820common->utf = (re->options & PCRE_UTF8) != 0; 9821#ifdef SUPPORT_UCP 9822common->use_ucp = (re->options & PCRE_UCP) != 0; 9823#endif 9824if (common->utf) 9825 { 9826 if (common->nltype == NLTYPE_ANY) 9827 common->nlmax = 0x2029; 9828 else if (common->nltype == NLTYPE_ANYCRLF) 9829 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL; 9830 else 9831 { 9832 /* We only care about the first newline character. */ 9833 common->nlmax = common->newline & 0xff; 9834 } 9835 9836 if (common->nltype == NLTYPE_FIXED) 9837 common->nlmin = common->newline & 0xff; 9838 else 9839 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL; 9840 9841 if (common->bsr_nltype == NLTYPE_ANY) 9842 common->bsr_nlmax = 0x2029; 9843 else 9844 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL; 9845 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL; 9846 } 9847#endif /* SUPPORT_UTF */ 9848ccend = bracketend(common->start); 9849 9850/* Calculate the local space size on the stack. */ 9851common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw); 9852common->optimized_cbracket = (pcre_uint8 *)SLJIT_MALLOC(re->top_bracket + 1); 9853if (!common->optimized_cbracket) 9854 return; 9855#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1 9856memset(common->optimized_cbracket, 0, re->top_bracket + 1); 9857#else 9858memset(common->optimized_cbracket, 1, re->top_bracket + 1); 9859#endif 9860 9861SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET); 9862#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2 9863common->capture_last_ptr = common->ovector_start; 9864common->ovector_start += sizeof(sljit_sw); 9865#endif 9866if (!check_opcode_types(common, common->start, ccend)) 9867 { 9868 SLJIT_FREE(common->optimized_cbracket); 9869 return; 9870 } 9871 9872/* Checking flags and updating ovector_start. */ 9873if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0) 9874 { 9875 common->req_char_ptr = common->ovector_start; 9876 common->ovector_start += sizeof(sljit_sw); 9877 } 9878if (mode != JIT_COMPILE) 9879 { 9880 common->start_used_ptr = common->ovector_start; 9881 common->ovector_start += sizeof(sljit_sw); 9882 if (mode == JIT_PARTIAL_SOFT_COMPILE) 9883 { 9884 common->hit_start = common->ovector_start; 9885 common->ovector_start += 2 * sizeof(sljit_sw); 9886 } 9887 else 9888 { 9889 SLJIT_ASSERT(mode == JIT_PARTIAL_HARD_COMPILE); 9890 common->needs_start_ptr = TRUE; 9891 } 9892 } 9893if ((re->options & PCRE_FIRSTLINE) != 0) 9894 { 9895 common->first_line_end = common->ovector_start; 9896 common->ovector_start += sizeof(sljit_sw); 9897 } 9898#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD 9899common->control_head_ptr = 1; 9900#endif 9901if (common->control_head_ptr != 0) 9902 { 9903 common->control_head_ptr = common->ovector_start; 9904 common->ovector_start += sizeof(sljit_sw); 9905 } 9906if (common->needs_start_ptr && common->has_set_som) 9907 { 9908 /* Saving the real start pointer is necessary. */ 9909 common->start_ptr = common->ovector_start; 9910 common->ovector_start += sizeof(sljit_sw); 9911 } 9912else 9913 common->needs_start_ptr = FALSE; 9914 9915/* Aligning ovector to even number of sljit words. */ 9916if ((common->ovector_start & sizeof(sljit_sw)) != 0) 9917 common->ovector_start += sizeof(sljit_sw); 9918 9919if (common->start_ptr == 0) 9920 common->start_ptr = OVECTOR(0); 9921 9922/* Capturing brackets cannot be optimized if callouts are allowed. */ 9923if (common->capture_last_ptr != 0) 9924 memset(common->optimized_cbracket, 0, re->top_bracket + 1); 9925 9926SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0)); 9927common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw); 9928 9929total_length = ccend - common->start; 9930common->private_data_ptrs = (sljit_si *)SLJIT_MALLOC(total_length * (sizeof(sljit_si) + (common->has_then ? 1 : 0))); 9931if (!common->private_data_ptrs) 9932 { 9933 SLJIT_FREE(common->optimized_cbracket); 9934 return; 9935 } 9936memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_si)); 9937 9938private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw); 9939set_private_data_ptrs(common, &private_data_size, ccend); 9940if (private_data_size > SLJIT_MAX_LOCAL_SIZE) 9941 { 9942 SLJIT_FREE(common->private_data_ptrs); 9943 SLJIT_FREE(common->optimized_cbracket); 9944 return; 9945 } 9946 9947if (common->has_then) 9948 { 9949 common->then_offsets = (pcre_uint8 *)(common->private_data_ptrs + total_length); 9950 memset(common->then_offsets, 0, total_length); 9951 set_then_offsets(common, common->start, NULL); 9952 } 9953 9954if (common->read_only_data_size > 0) 9955 { 9956 common->read_only_data = (sljit_uw *)SLJIT_MALLOC(common->read_only_data_size); 9957 if (common->read_only_data == NULL) 9958 { 9959 SLJIT_FREE(common->optimized_cbracket); 9960 SLJIT_FREE(common->private_data_ptrs); 9961 return; 9962 } 9963 common->read_only_data_ptr = common->read_only_data; 9964 } 9965 9966compiler = sljit_create_compiler(); 9967if (!compiler) 9968 { 9969 SLJIT_FREE(common->optimized_cbracket); 9970 SLJIT_FREE(common->private_data_ptrs); 9971 if (common->read_only_data) 9972 SLJIT_FREE(common->read_only_data); 9973 return; 9974 } 9975common->compiler = compiler; 9976 9977/* Main pcre_jit_exec entry. */ 9978sljit_emit_enter(compiler, 0, 1, 5, 5, 0, 0, private_data_size); 9979 9980/* Register init. */ 9981reset_ovector(common, (re->top_bracket + 1) * 2); 9982if (common->req_char_ptr != 0) 9983 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0); 9984 9985OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0); 9986OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0); 9987OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); 9988OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end)); 9989OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); 9990OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match)); 9991OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base)); 9992OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit)); 9993OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0); 9994 9995if (mode == JIT_PARTIAL_SOFT_COMPILE) 9996 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1); 9997if (common->mark_ptr != 0) 9998 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0); 9999if (common->control_head_ptr != 0) 10000 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); 10001 10002/* Main part of the matching */ 10003if ((re->options & PCRE_ANCHORED) == 0) 10004 { 10005 mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0); 10006 continue_match_label = LABEL(); 10007 /* Forward search if possible. */ 10008 if ((re->options & PCRE_NO_START_OPTIMIZE) == 0) 10009 { 10010 if (mode == JIT_COMPILE && fast_forward_first_n_chars(common, (re->options & PCRE_FIRSTLINE) != 0)) 10011 { 10012 /* If read_only_data is reallocated, we might have an allocation failure. */ 10013 if (common->read_only_data_size > 0 && common->read_only_data == NULL) 10014 { 10015 sljit_free_compiler(compiler); 10016 SLJIT_FREE(common->optimized_cbracket); 10017 SLJIT_FREE(common->private_data_ptrs); 10018 return; 10019 } 10020 } 10021 else if ((re->flags & PCRE_FIRSTSET) != 0) 10022 fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0); 10023 else if ((re->flags & PCRE_STARTLINE) != 0) 10024 fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0); 10025 else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0) 10026 fast_forward_start_bits(common, study->start_bits, (re->options & PCRE_FIRSTLINE) != 0); 10027 } 10028 } 10029else 10030 continue_match_label = LABEL(); 10031 10032if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0) 10033 { 10034 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); 10035 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength)); 10036 minlength_check_failed = CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0); 10037 } 10038if (common->req_char_ptr != 0) 10039 reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0); 10040 10041/* Store the current STR_PTR in OVECTOR(0). */ 10042OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0); 10043/* Copy the limit of allowed recursions. */ 10044OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH); 10045if (common->capture_last_ptr != 0) 10046 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1); 10047 10048if (common->needs_start_ptr) 10049 { 10050 SLJIT_ASSERT(common->start_ptr != OVECTOR(0)); 10051 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0); 10052 } 10053else 10054 SLJIT_ASSERT(common->start_ptr == OVECTOR(0)); 10055 10056/* Copy the beginning of the string. */ 10057if (mode == JIT_PARTIAL_SOFT_COMPILE) 10058 { 10059 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1); 10060 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); 10061 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start + sizeof(sljit_sw), STR_PTR, 0); 10062 JUMPHERE(jump); 10063 } 10064else if (mode == JIT_PARTIAL_HARD_COMPILE) 10065 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); 10066 10067compile_matchingpath(common, common->start, ccend, &rootbacktrack); 10068if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 10069 { 10070 sljit_free_compiler(compiler); 10071 SLJIT_FREE(common->optimized_cbracket); 10072 SLJIT_FREE(common->private_data_ptrs); 10073 if (common->read_only_data) 10074 SLJIT_FREE(common->read_only_data); 10075 return; 10076 } 10077 10078if (common->might_be_empty) 10079 { 10080 empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); 10081 empty_match_found_label = LABEL(); 10082 } 10083 10084common->accept_label = LABEL(); 10085if (common->accept != NULL) 10086 set_jumps(common->accept, common->accept_label); 10087 10088/* This means we have a match. Update the ovector. */ 10089copy_ovector(common, re->top_bracket + 1); 10090common->quit_label = common->forced_quit_label = LABEL(); 10091if (common->quit != NULL) 10092 set_jumps(common->quit, common->quit_label); 10093if (common->forced_quit != NULL) 10094 set_jumps(common->forced_quit, common->forced_quit_label); 10095if (minlength_check_failed != NULL) 10096 SET_LABEL(minlength_check_failed, common->forced_quit_label); 10097sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); 10098 10099if (mode != JIT_COMPILE) 10100 { 10101 common->partialmatchlabel = LABEL(); 10102 set_jumps(common->partialmatch, common->partialmatchlabel); 10103 return_with_partial_match(common, common->quit_label); 10104 } 10105 10106if (common->might_be_empty) 10107 empty_match_backtrack_label = LABEL(); 10108compile_backtrackingpath(common, rootbacktrack.top); 10109if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 10110 { 10111 sljit_free_compiler(compiler); 10112 SLJIT_FREE(common->optimized_cbracket); 10113 SLJIT_FREE(common->private_data_ptrs); 10114 if (common->read_only_data) 10115 SLJIT_FREE(common->read_only_data); 10116 return; 10117 } 10118 10119SLJIT_ASSERT(rootbacktrack.prev == NULL); 10120reset_match_label = LABEL(); 10121 10122if (mode == JIT_PARTIAL_SOFT_COMPILE) 10123 { 10124 /* Update hit_start only in the first time. */ 10125 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); 10126 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr); 10127 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1); 10128 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0); 10129 JUMPHERE(jump); 10130 } 10131 10132/* Check we have remaining characters. */ 10133if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0) 10134 { 10135 SLJIT_ASSERT(common->first_line_end != 0); 10136 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end); 10137 } 10138 10139OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); 10140 10141if ((re->options & PCRE_ANCHORED) == 0) 10142 { 10143 if (common->ff_newline_shortcut != NULL) 10144 { 10145 if ((re->options & PCRE_FIRSTLINE) == 0) 10146 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut); 10147 /* There cannot be more newlines here. */ 10148 } 10149 else 10150 { 10151 if ((re->options & PCRE_FIRSTLINE) == 0) 10152 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop_label); 10153 else 10154 CMPTO(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0, mainloop_label); 10155 } 10156 } 10157 10158/* No more remaining characters. */ 10159if (reqbyte_notfound != NULL) 10160 JUMPHERE(reqbyte_notfound); 10161 10162if (mode == JIT_PARTIAL_SOFT_COMPILE) 10163 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel); 10164 10165OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); 10166JUMPTO(SLJIT_JUMP, common->quit_label); 10167 10168flush_stubs(common); 10169 10170if (common->might_be_empty) 10171 { 10172 JUMPHERE(empty_match); 10173 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 10174 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty)); 10175 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label); 10176 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart)); 10177 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label); 10178 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); 10179 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label); 10180 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label); 10181 } 10182 10183common->currententry = common->entries; 10184common->local_exit = TRUE; 10185quit_label = common->quit_label; 10186while (common->currententry != NULL) 10187 { 10188 /* Might add new entries. */ 10189 compile_recurse(common); 10190 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) 10191 { 10192 sljit_free_compiler(compiler); 10193 SLJIT_FREE(common->optimized_cbracket); 10194 SLJIT_FREE(common->private_data_ptrs); 10195 if (common->read_only_data) 10196 SLJIT_FREE(common->read_only_data); 10197 return; 10198 } 10199 flush_stubs(common); 10200 common->currententry = common->currententry->next; 10201 } 10202common->local_exit = FALSE; 10203common->quit_label = quit_label; 10204 10205/* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */ 10206/* This is a (really) rare case. */ 10207set_jumps(common->stackalloc, LABEL()); 10208/* RETURN_ADDR is not a saved register. */ 10209sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); 10210OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0); 10211OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 10212OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); 10213OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0); 10214OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE); 10215 10216sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize)); 10217jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); 10218OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); 10219OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); 10220OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top)); 10221OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit)); 10222OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); 10223sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); 10224 10225/* Allocation failed. */ 10226JUMPHERE(jump); 10227/* We break the return address cache here, but this is a really rare case. */ 10228OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT); 10229JUMPTO(SLJIT_JUMP, common->quit_label); 10230 10231/* Call limit reached. */ 10232set_jumps(common->calllimit, LABEL()); 10233OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT); 10234JUMPTO(SLJIT_JUMP, common->quit_label); 10235 10236if (common->revertframes != NULL) 10237 { 10238 set_jumps(common->revertframes, LABEL()); 10239 do_revertframes(common); 10240 } 10241if (common->wordboundary != NULL) 10242 { 10243 set_jumps(common->wordboundary, LABEL()); 10244 check_wordboundary(common); 10245 } 10246if (common->anynewline != NULL) 10247 { 10248 set_jumps(common->anynewline, LABEL()); 10249 check_anynewline(common); 10250 } 10251if (common->hspace != NULL) 10252 { 10253 set_jumps(common->hspace, LABEL()); 10254 check_hspace(common); 10255 } 10256if (common->vspace != NULL) 10257 { 10258 set_jumps(common->vspace, LABEL()); 10259 check_vspace(common); 10260 } 10261if (common->casefulcmp != NULL) 10262 { 10263 set_jumps(common->casefulcmp, LABEL()); 10264 do_casefulcmp(common); 10265 } 10266if (common->caselesscmp != NULL) 10267 { 10268 set_jumps(common->caselesscmp, LABEL()); 10269 do_caselesscmp(common); 10270 } 10271if (common->reset_match != NULL) 10272 { 10273 set_jumps(common->reset_match, LABEL()); 10274 do_reset_match(common, (re->top_bracket + 1) * 2); 10275 CMPTO(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label); 10276 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); 10277 JUMPTO(SLJIT_JUMP, reset_match_label); 10278 } 10279#ifdef SUPPORT_UTF 10280#ifdef COMPILE_PCRE8 10281if (common->utfreadchar != NULL) 10282 { 10283 set_jumps(common->utfreadchar, LABEL()); 10284 do_utfreadchar(common); 10285 } 10286if (common->utfreadchar16 != NULL) 10287 { 10288 set_jumps(common->utfreadchar16, LABEL()); 10289 do_utfreadchar16(common); 10290 } 10291if (common->utfreadtype8 != NULL) 10292 { 10293 set_jumps(common->utfreadtype8, LABEL()); 10294 do_utfreadtype8(common); 10295 } 10296#endif /* COMPILE_PCRE8 */ 10297#endif /* SUPPORT_UTF */ 10298#ifdef SUPPORT_UCP 10299if (common->getucd != NULL) 10300 { 10301 set_jumps(common->getucd, LABEL()); 10302 do_getucd(common); 10303 } 10304#endif 10305 10306SLJIT_ASSERT(common->read_only_data + (common->read_only_data_size >> SLJIT_WORD_SHIFT) == common->read_only_data_ptr); 10307SLJIT_FREE(common->optimized_cbracket); 10308SLJIT_FREE(common->private_data_ptrs); 10309 10310executable_func = sljit_generate_code(compiler); 10311executable_size = sljit_get_generated_code_size(compiler); 10312label_addr = common->label_addrs; 10313while (label_addr != NULL) 10314 { 10315 *label_addr->update_addr = sljit_get_label_addr(label_addr->label); 10316 label_addr = label_addr->next; 10317 } 10318sljit_free_compiler(compiler); 10319if (executable_func == NULL) 10320 { 10321 if (common->read_only_data) 10322 SLJIT_FREE(common->read_only_data); 10323 return; 10324 } 10325 10326/* Reuse the function descriptor if possible. */ 10327if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL) 10328 functions = (executable_functions *)extra->executable_jit; 10329else 10330 { 10331 /* Note: If your memory-checker has flagged the allocation below as a 10332 * memory leak, it is probably because you either forgot to call 10333 * pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or 10334 * pcre16_extra) object, or you called said function after having 10335 * cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field 10336 * of the object. (The function will only free the JIT data if the 10337 * bit remains set, as the bit indicates that the pointer to the data 10338 * is valid.) 10339 */ 10340 functions = SLJIT_MALLOC(sizeof(executable_functions)); 10341 if (functions == NULL) 10342 { 10343 /* This case is highly unlikely since we just recently 10344 freed a lot of memory. Not impossible though. */ 10345 sljit_free_code(executable_func); 10346 if (common->read_only_data) 10347 SLJIT_FREE(common->read_only_data); 10348 return; 10349 } 10350 memset(functions, 0, sizeof(executable_functions)); 10351 functions->top_bracket = (re->top_bracket + 1) * 2; 10352 functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0; 10353 extra->executable_jit = functions; 10354 extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT; 10355 } 10356 10357functions->executable_funcs[mode] = executable_func; 10358functions->read_only_data[mode] = common->read_only_data; 10359functions->executable_sizes[mode] = executable_size; 10360} 10361 10362static int jit_machine_stack_exec(jit_arguments *arguments, void* executable_func) 10363{ 10364union { 10365 void* executable_func; 10366 jit_function call_executable_func; 10367} convert_executable_func; 10368pcre_uint8 local_space[MACHINE_STACK_SIZE]; 10369struct sljit_stack local_stack; 10370 10371local_stack.top = (sljit_sw)&local_space; 10372local_stack.base = local_stack.top; 10373local_stack.limit = local_stack.base + MACHINE_STACK_SIZE; 10374local_stack.max_limit = local_stack.limit; 10375arguments->stack = &local_stack; 10376convert_executable_func.executable_func = executable_func; 10377return convert_executable_func.call_executable_func(arguments); 10378} 10379 10380int 10381PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject, 10382 int length, int start_offset, int options, int *offsets, int offset_count) 10383{ 10384executable_functions *functions = (executable_functions *)extra_data->executable_jit; 10385union { 10386 void* executable_func; 10387 jit_function call_executable_func; 10388} convert_executable_func; 10389jit_arguments arguments; 10390int max_offset_count; 10391int retval; 10392int mode = JIT_COMPILE; 10393 10394if ((options & PCRE_PARTIAL_HARD) != 0) 10395 mode = JIT_PARTIAL_HARD_COMPILE; 10396else if ((options & PCRE_PARTIAL_SOFT) != 0) 10397 mode = JIT_PARTIAL_SOFT_COMPILE; 10398 10399if (functions->executable_funcs[mode] == NULL) 10400 return PCRE_ERROR_JIT_BADOPTION; 10401 10402/* Sanity checks should be handled by pcre_exec. */ 10403arguments.str = subject + start_offset; 10404arguments.begin = subject; 10405arguments.end = subject + length; 10406arguments.mark_ptr = NULL; 10407/* JIT decreases this value less frequently than the interpreter. */ 10408arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit); 10409if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match) 10410 arguments.limit_match = functions->limit_match; 10411arguments.notbol = (options & PCRE_NOTBOL) != 0; 10412arguments.noteol = (options & PCRE_NOTEOL) != 0; 10413arguments.notempty = (options & PCRE_NOTEMPTY) != 0; 10414arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0; 10415arguments.offsets = offsets; 10416arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL; 10417arguments.real_offset_count = offset_count; 10418 10419/* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of 10420the output vector for storing captured strings, with the remainder used as 10421workspace. We don't need the workspace here. For compatibility, we limit the 10422number of captured strings in the same way as pcre_exec(), so that the user 10423gets the same result with and without JIT. */ 10424 10425if (offset_count != 2) 10426 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3; 10427max_offset_count = functions->top_bracket; 10428if (offset_count > max_offset_count) 10429 offset_count = max_offset_count; 10430arguments.offset_count = offset_count; 10431 10432if (functions->callback) 10433 arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata); 10434else 10435 arguments.stack = (struct sljit_stack *)functions->userdata; 10436 10437if (arguments.stack == NULL) 10438 retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]); 10439else 10440 { 10441 convert_executable_func.executable_func = functions->executable_funcs[mode]; 10442 retval = convert_executable_func.call_executable_func(&arguments); 10443 } 10444 10445if (retval * 2 > offset_count) 10446 retval = 0; 10447if ((extra_data->flags & PCRE_EXTRA_MARK) != 0) 10448 *(extra_data->mark) = arguments.mark_ptr; 10449 10450return retval; 10451} 10452 10453#if defined COMPILE_PCRE8 10454PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 10455pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data, 10456 PCRE_SPTR subject, int length, int start_offset, int options, 10457 int *offsets, int offset_count, pcre_jit_stack *stack) 10458#elif defined COMPILE_PCRE16 10459PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 10460pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data, 10461 PCRE_SPTR16 subject, int length, int start_offset, int options, 10462 int *offsets, int offset_count, pcre16_jit_stack *stack) 10463#elif defined COMPILE_PCRE32 10464PCRE_EXP_DEFN int PCRE_CALL_CONVENTION 10465pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data, 10466 PCRE_SPTR32 subject, int length, int start_offset, int options, 10467 int *offsets, int offset_count, pcre32_jit_stack *stack) 10468#endif 10469{ 10470pcre_uchar *subject_ptr = (pcre_uchar *)subject; 10471executable_functions *functions = (executable_functions *)extra_data->executable_jit; 10472union { 10473 void* executable_func; 10474 jit_function call_executable_func; 10475} convert_executable_func; 10476jit_arguments arguments; 10477int max_offset_count; 10478int retval; 10479int mode = JIT_COMPILE; 10480 10481SLJIT_UNUSED_ARG(argument_re); 10482 10483/* Plausibility checks */ 10484if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION; 10485 10486if ((options & PCRE_PARTIAL_HARD) != 0) 10487 mode = JIT_PARTIAL_HARD_COMPILE; 10488else if ((options & PCRE_PARTIAL_SOFT) != 0) 10489 mode = JIT_PARTIAL_SOFT_COMPILE; 10490 10491if (functions->executable_funcs[mode] == NULL) 10492 return PCRE_ERROR_JIT_BADOPTION; 10493 10494/* Sanity checks should be handled by pcre_exec. */ 10495arguments.stack = (struct sljit_stack *)stack; 10496arguments.str = subject_ptr + start_offset; 10497arguments.begin = subject_ptr; 10498arguments.end = subject_ptr + length; 10499arguments.mark_ptr = NULL; 10500/* JIT decreases this value less frequently than the interpreter. */ 10501arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (pcre_uint32)(extra_data->match_limit); 10502if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match) 10503 arguments.limit_match = functions->limit_match; 10504arguments.notbol = (options & PCRE_NOTBOL) != 0; 10505arguments.noteol = (options & PCRE_NOTEOL) != 0; 10506arguments.notempty = (options & PCRE_NOTEMPTY) != 0; 10507arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0; 10508arguments.offsets = offsets; 10509arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL; 10510arguments.real_offset_count = offset_count; 10511 10512/* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of 10513the output vector for storing captured strings, with the remainder used as 10514workspace. We don't need the workspace here. For compatibility, we limit the 10515number of captured strings in the same way as pcre_exec(), so that the user 10516gets the same result with and without JIT. */ 10517 10518if (offset_count != 2) 10519 offset_count = ((offset_count - (offset_count % 3)) * 2) / 3; 10520max_offset_count = functions->top_bracket; 10521if (offset_count > max_offset_count) 10522 offset_count = max_offset_count; 10523arguments.offset_count = offset_count; 10524 10525convert_executable_func.executable_func = functions->executable_funcs[mode]; 10526retval = convert_executable_func.call_executable_func(&arguments); 10527 10528if (retval * 2 > offset_count) 10529 retval = 0; 10530if ((extra_data->flags & PCRE_EXTRA_MARK) != 0) 10531 *(extra_data->mark) = arguments.mark_ptr; 10532 10533return retval; 10534} 10535 10536void 10537PRIV(jit_free)(void *executable_funcs) 10538{ 10539int i; 10540executable_functions *functions = (executable_functions *)executable_funcs; 10541for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++) 10542 { 10543 if (functions->executable_funcs[i] != NULL) 10544 sljit_free_code(functions->executable_funcs[i]); 10545 if (functions->read_only_data[i] != NULL) 10546 SLJIT_FREE(functions->read_only_data[i]); 10547 } 10548SLJIT_FREE(functions); 10549} 10550 10551int 10552PRIV(jit_get_size)(void *executable_funcs) 10553{ 10554int i; 10555sljit_uw size = 0; 10556sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes; 10557for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++) 10558 size += executable_sizes[i]; 10559return (int)size; 10560} 10561 10562const char* 10563PRIV(jit_get_target)(void) 10564{ 10565return sljit_get_platform_name(); 10566} 10567 10568#if defined COMPILE_PCRE8 10569PCRE_EXP_DECL pcre_jit_stack * 10570pcre_jit_stack_alloc(int startsize, int maxsize) 10571#elif defined COMPILE_PCRE16 10572PCRE_EXP_DECL pcre16_jit_stack * 10573pcre16_jit_stack_alloc(int startsize, int maxsize) 10574#elif defined COMPILE_PCRE32 10575PCRE_EXP_DECL pcre32_jit_stack * 10576pcre32_jit_stack_alloc(int startsize, int maxsize) 10577#endif 10578{ 10579if (startsize < 1 || maxsize < 1) 10580 return NULL; 10581if (startsize > maxsize) 10582 startsize = maxsize; 10583startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); 10584maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); 10585return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize); 10586} 10587 10588#if defined COMPILE_PCRE8 10589PCRE_EXP_DECL void 10590pcre_jit_stack_free(pcre_jit_stack *stack) 10591#elif defined COMPILE_PCRE16 10592PCRE_EXP_DECL void 10593pcre16_jit_stack_free(pcre16_jit_stack *stack) 10594#elif defined COMPILE_PCRE32 10595PCRE_EXP_DECL void 10596pcre32_jit_stack_free(pcre32_jit_stack *stack) 10597#endif 10598{ 10599sljit_free_stack((struct sljit_stack *)stack); 10600} 10601 10602#if defined COMPILE_PCRE8 10603PCRE_EXP_DECL void 10604pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata) 10605#elif defined COMPILE_PCRE16 10606PCRE_EXP_DECL void 10607pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata) 10608#elif defined COMPILE_PCRE32 10609PCRE_EXP_DECL void 10610pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata) 10611#endif 10612{ 10613executable_functions *functions; 10614if (extra != NULL && 10615 (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && 10616 extra->executable_jit != NULL) 10617 { 10618 functions = (executable_functions *)extra->executable_jit; 10619 functions->callback = callback; 10620 functions->userdata = userdata; 10621 } 10622} 10623 10624#if defined COMPILE_PCRE8 10625PCRE_EXP_DECL void 10626pcre_jit_free_unused_memory(void) 10627#elif defined COMPILE_PCRE16 10628PCRE_EXP_DECL void 10629pcre16_jit_free_unused_memory(void) 10630#elif defined COMPILE_PCRE32 10631PCRE_EXP_DECL void 10632pcre32_jit_free_unused_memory(void) 10633#endif 10634{ 10635sljit_free_unused_memory_exec(); 10636} 10637 10638#else /* SUPPORT_JIT */ 10639 10640/* These are dummy functions to avoid linking errors when JIT support is not 10641being compiled. */ 10642 10643#if defined COMPILE_PCRE8 10644PCRE_EXP_DECL pcre_jit_stack * 10645pcre_jit_stack_alloc(int startsize, int maxsize) 10646#elif defined COMPILE_PCRE16 10647PCRE_EXP_DECL pcre16_jit_stack * 10648pcre16_jit_stack_alloc(int startsize, int maxsize) 10649#elif defined COMPILE_PCRE32 10650PCRE_EXP_DECL pcre32_jit_stack * 10651pcre32_jit_stack_alloc(int startsize, int maxsize) 10652#endif 10653{ 10654(void)startsize; 10655(void)maxsize; 10656return NULL; 10657} 10658 10659#if defined COMPILE_PCRE8 10660PCRE_EXP_DECL void 10661pcre_jit_stack_free(pcre_jit_stack *stack) 10662#elif defined COMPILE_PCRE16 10663PCRE_EXP_DECL void 10664pcre16_jit_stack_free(pcre16_jit_stack *stack) 10665#elif defined COMPILE_PCRE32 10666PCRE_EXP_DECL void 10667pcre32_jit_stack_free(pcre32_jit_stack *stack) 10668#endif 10669{ 10670(void)stack; 10671} 10672 10673#if defined COMPILE_PCRE8 10674PCRE_EXP_DECL void 10675pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata) 10676#elif defined COMPILE_PCRE16 10677PCRE_EXP_DECL void 10678pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata) 10679#elif defined COMPILE_PCRE32 10680PCRE_EXP_DECL void 10681pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata) 10682#endif 10683{ 10684(void)extra; 10685(void)callback; 10686(void)userdata; 10687} 10688 10689#if defined COMPILE_PCRE8 10690PCRE_EXP_DECL void 10691pcre_jit_free_unused_memory(void) 10692#elif defined COMPILE_PCRE16 10693PCRE_EXP_DECL void 10694pcre16_jit_free_unused_memory(void) 10695#elif defined COMPILE_PCRE32 10696PCRE_EXP_DECL void 10697pcre32_jit_free_unused_memory(void) 10698#endif 10699{ 10700} 10701 10702#endif 10703 10704/* End of pcre_jit_compile.c */ 10705