sre_lib.h revision 02eae6b1f23d1c5ea87816f1f8d15cdab2dfaf0a
1/* 2 * Secret Labs' Regular Expression Engine 3 * 4 * regular expression matching engine 5 * 6 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. 7 * 8 * See the _sre.c file for information on usage and redistribution. 9 */ 10 11/* String matching engine */ 12 13/* This file is included three times, with different character settings */ 14 15LOCAL(int) 16SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at) 17{ 18 /* check if pointer is at given position */ 19 20 Py_ssize_t thisp, thatp; 21 22 switch (at) { 23 24 case SRE_AT_BEGINNING: 25 case SRE_AT_BEGINNING_STRING: 26 return ((void*) ptr == state->beginning); 27 28 case SRE_AT_BEGINNING_LINE: 29 return ((void*) ptr == state->beginning || 30 SRE_IS_LINEBREAK((int) ptr[-1])); 31 32 case SRE_AT_END: 33 return (((SRE_CHAR *)state->end - ptr == 1 && 34 SRE_IS_LINEBREAK((int) ptr[0])) || 35 ((void*) ptr == state->end)); 36 37 case SRE_AT_END_LINE: 38 return ((void*) ptr == state->end || 39 SRE_IS_LINEBREAK((int) ptr[0])); 40 41 case SRE_AT_END_STRING: 42 return ((void*) ptr == state->end); 43 44 case SRE_AT_BOUNDARY: 45 if (state->beginning == state->end) 46 return 0; 47 thatp = ((void*) ptr > state->beginning) ? 48 SRE_IS_WORD((int) ptr[-1]) : 0; 49 thisp = ((void*) ptr < state->end) ? 50 SRE_IS_WORD((int) ptr[0]) : 0; 51 return thisp != thatp; 52 53 case SRE_AT_NON_BOUNDARY: 54 if (state->beginning == state->end) 55 return 0; 56 thatp = ((void*) ptr > state->beginning) ? 57 SRE_IS_WORD((int) ptr[-1]) : 0; 58 thisp = ((void*) ptr < state->end) ? 59 SRE_IS_WORD((int) ptr[0]) : 0; 60 return thisp == thatp; 61 62 case SRE_AT_LOC_BOUNDARY: 63 if (state->beginning == state->end) 64 return 0; 65 thatp = ((void*) ptr > state->beginning) ? 66 SRE_LOC_IS_WORD((int) ptr[-1]) : 0; 67 thisp = ((void*) ptr < state->end) ? 68 SRE_LOC_IS_WORD((int) ptr[0]) : 0; 69 return thisp != thatp; 70 71 case SRE_AT_LOC_NON_BOUNDARY: 72 if (state->beginning == state->end) 73 return 0; 74 thatp = ((void*) ptr > state->beginning) ? 75 SRE_LOC_IS_WORD((int) ptr[-1]) : 0; 76 thisp = ((void*) ptr < state->end) ? 77 SRE_LOC_IS_WORD((int) ptr[0]) : 0; 78 return thisp == thatp; 79 80 case SRE_AT_UNI_BOUNDARY: 81 if (state->beginning == state->end) 82 return 0; 83 thatp = ((void*) ptr > state->beginning) ? 84 SRE_UNI_IS_WORD((int) ptr[-1]) : 0; 85 thisp = ((void*) ptr < state->end) ? 86 SRE_UNI_IS_WORD((int) ptr[0]) : 0; 87 return thisp != thatp; 88 89 case SRE_AT_UNI_NON_BOUNDARY: 90 if (state->beginning == state->end) 91 return 0; 92 thatp = ((void*) ptr > state->beginning) ? 93 SRE_UNI_IS_WORD((int) ptr[-1]) : 0; 94 thisp = ((void*) ptr < state->end) ? 95 SRE_UNI_IS_WORD((int) ptr[0]) : 0; 96 return thisp == thatp; 97 98 } 99 100 return 0; 101} 102 103LOCAL(int) 104SRE(charset)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch) 105{ 106 /* check if character is a member of the given set */ 107 108 int ok = 1; 109 110 for (;;) { 111 switch (*set++) { 112 113 case SRE_OP_FAILURE: 114 return !ok; 115 116 case SRE_OP_LITERAL: 117 /* <LITERAL> <code> */ 118 if (ch == set[0]) 119 return ok; 120 set++; 121 break; 122 123 case SRE_OP_CATEGORY: 124 /* <CATEGORY> <code> */ 125 if (sre_category(set[0], (int) ch)) 126 return ok; 127 set++; 128 break; 129 130 case SRE_OP_CHARSET: 131 /* <CHARSET> <bitmap> */ 132 if (ch < 256 && 133 (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1))))) 134 return ok; 135 set += 256/SRE_CODE_BITS; 136 break; 137 138 case SRE_OP_RANGE: 139 /* <RANGE> <lower> <upper> */ 140 if (set[0] <= ch && ch <= set[1]) 141 return ok; 142 set += 2; 143 break; 144 145 case SRE_OP_RANGE_IGNORE: 146 /* <RANGE_IGNORE> <lower> <upper> */ 147 { 148 SRE_CODE uch; 149 /* ch is already lower cased */ 150 if (set[0] <= ch && ch <= set[1]) 151 return ok; 152 uch = state->upper(ch); 153 if (set[0] <= uch && uch <= set[1]) 154 return ok; 155 set += 2; 156 break; 157 } 158 159 case SRE_OP_NEGATE: 160 ok = !ok; 161 break; 162 163 case SRE_OP_BIGCHARSET: 164 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */ 165 { 166 Py_ssize_t count, block; 167 count = *(set++); 168 169 if (ch < 0x10000u) 170 block = ((unsigned char*)set)[ch >> 8]; 171 else 172 block = -1; 173 set += 256/sizeof(SRE_CODE); 174 if (block >=0 && 175 (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] & 176 (1u << (ch & (SRE_CODE_BITS-1))))) 177 return ok; 178 set += count * (256/SRE_CODE_BITS); 179 break; 180 } 181 182 default: 183 /* internal error -- there's not much we can do about it 184 here, so let's just pretend it didn't match... */ 185 return 0; 186 } 187 } 188} 189 190LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all); 191 192LOCAL(Py_ssize_t) 193SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount) 194{ 195 SRE_CODE chr; 196 SRE_CHAR c; 197 SRE_CHAR* ptr = (SRE_CHAR *)state->ptr; 198 SRE_CHAR* end = (SRE_CHAR *)state->end; 199 Py_ssize_t i; 200 201 /* adjust end */ 202 if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT) 203 end = ptr + maxcount; 204 205 switch (pattern[0]) { 206 207 case SRE_OP_IN: 208 /* repeated set */ 209 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr)); 210 while (ptr < end && SRE(charset)(state, pattern + 2, *ptr)) 211 ptr++; 212 break; 213 214 case SRE_OP_ANY: 215 /* repeated dot wildcard. */ 216 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr)); 217 while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) 218 ptr++; 219 break; 220 221 case SRE_OP_ANY_ALL: 222 /* repeated dot wildcard. skip to the end of the target 223 string, and backtrack from there */ 224 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr)); 225 ptr = end; 226 break; 227 228 case SRE_OP_LITERAL: 229 /* repeated literal */ 230 chr = pattern[1]; 231 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr)); 232 c = (SRE_CHAR) chr; 233#if SIZEOF_SRE_CHAR < 4 234 if ((SRE_CODE) c != chr) 235 ; /* literal can't match: doesn't fit in char width */ 236 else 237#endif 238 while (ptr < end && *ptr == c) 239 ptr++; 240 break; 241 242 case SRE_OP_LITERAL_IGNORE: 243 /* repeated literal */ 244 chr = pattern[1]; 245 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr)); 246 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr) 247 ptr++; 248 break; 249 250 case SRE_OP_NOT_LITERAL: 251 /* repeated non-literal */ 252 chr = pattern[1]; 253 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr)); 254 c = (SRE_CHAR) chr; 255#if SIZEOF_SRE_CHAR < 4 256 if ((SRE_CODE) c != chr) 257 ptr = end; /* literal can't match: doesn't fit in char width */ 258 else 259#endif 260 while (ptr < end && *ptr != c) 261 ptr++; 262 break; 263 264 case SRE_OP_NOT_LITERAL_IGNORE: 265 /* repeated non-literal */ 266 chr = pattern[1]; 267 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr)); 268 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr) 269 ptr++; 270 break; 271 272 default: 273 /* repeated single character pattern */ 274 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); 275 while ((SRE_CHAR*) state->ptr < end) { 276 i = SRE(match)(state, pattern, 0); 277 if (i < 0) 278 return i; 279 if (!i) 280 break; 281 } 282 TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr, 283 (SRE_CHAR*) state->ptr - ptr)); 284 return (SRE_CHAR*) state->ptr - ptr; 285 } 286 287 TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr, 288 ptr - (SRE_CHAR*) state->ptr)); 289 return ptr - (SRE_CHAR*) state->ptr; 290} 291 292#if 0 /* not used in this release */ 293LOCAL(int) 294SRE(info)(SRE_STATE* state, SRE_CODE* pattern) 295{ 296 /* check if an SRE_OP_INFO block matches at the current position. 297 returns the number of SRE_CODE objects to skip if successful, 0 298 if no match */ 299 300 SRE_CHAR* end = (SRE_CHAR*) state->end; 301 SRE_CHAR* ptr = (SRE_CHAR*) state->ptr; 302 Py_ssize_t i; 303 304 /* check minimal length */ 305 if (pattern[3] && end - ptr < pattern[3]) 306 return 0; 307 308 /* check known prefix */ 309 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) { 310 /* <length> <skip> <prefix data> <overlap data> */ 311 for (i = 0; i < pattern[5]; i++) 312 if ((SRE_CODE) ptr[i] != pattern[7 + i]) 313 return 0; 314 return pattern[0] + 2 * pattern[6]; 315 } 316 return pattern[0]; 317} 318#endif 319 320/* The macros below should be used to protect recursive SRE(match)() 321 * calls that *failed* and do *not* return immediately (IOW, those 322 * that will backtrack). Explaining: 323 * 324 * - Recursive SRE(match)() returned true: that's usually a success 325 * (besides atypical cases like ASSERT_NOT), therefore there's no 326 * reason to restore lastmark; 327 * 328 * - Recursive SRE(match)() returned false but the current SRE(match)() 329 * is returning to the caller: If the current SRE(match)() is the 330 * top function of the recursion, returning false will be a matching 331 * failure, and it doesn't matter where lastmark is pointing to. 332 * If it's *not* the top function, it will be a recursive SRE(match)() 333 * failure by itself, and the calling SRE(match)() will have to deal 334 * with the failure by the same rules explained here (it will restore 335 * lastmark by itself if necessary); 336 * 337 * - Recursive SRE(match)() returned false, and will continue the 338 * outside 'for' loop: must be protected when breaking, since the next 339 * OP could potentially depend on lastmark; 340 * 341 * - Recursive SRE(match)() returned false, and will be called again 342 * inside a local for/while loop: must be protected between each 343 * loop iteration, since the recursive SRE(match)() could do anything, 344 * and could potentially depend on lastmark. 345 * 346 * For more information, check the discussion at SF patch #712900. 347 */ 348#define LASTMARK_SAVE() \ 349 do { \ 350 ctx->lastmark = state->lastmark; \ 351 ctx->lastindex = state->lastindex; \ 352 } while (0) 353#define LASTMARK_RESTORE() \ 354 do { \ 355 state->lastmark = ctx->lastmark; \ 356 state->lastindex = ctx->lastindex; \ 357 } while (0) 358 359#define RETURN_ERROR(i) do { return i; } while(0) 360#define RETURN_FAILURE do { ret = 0; goto exit; } while(0) 361#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0) 362 363#define RETURN_ON_ERROR(i) \ 364 do { if (i < 0) RETURN_ERROR(i); } while (0) 365#define RETURN_ON_SUCCESS(i) \ 366 do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0) 367#define RETURN_ON_FAILURE(i) \ 368 do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0) 369 370#define SFY(x) #x 371 372#define DATA_STACK_ALLOC(state, type, ptr) \ 373do { \ 374 alloc_pos = state->data_stack_base; \ 375 TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \ 376 "(%" PY_FORMAT_SIZE_T "d)\n", \ 377 SFY(type), alloc_pos, sizeof(type))); \ 378 if (sizeof(type) > state->data_stack_size - alloc_pos) { \ 379 int j = data_stack_grow(state, sizeof(type)); \ 380 if (j < 0) return j; \ 381 if (ctx_pos != -1) \ 382 DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \ 383 } \ 384 ptr = (type*)(state->data_stack+alloc_pos); \ 385 state->data_stack_base += sizeof(type); \ 386} while (0) 387 388#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \ 389do { \ 390 TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", SFY(type), pos)); \ 391 ptr = (type*)(state->data_stack+pos); \ 392} while (0) 393 394#define DATA_STACK_PUSH(state, data, size) \ 395do { \ 396 TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \ 397 "(%" PY_FORMAT_SIZE_T "d)\n", \ 398 data, state->data_stack_base, size)); \ 399 if (size > state->data_stack_size - state->data_stack_base) { \ 400 int j = data_stack_grow(state, size); \ 401 if (j < 0) return j; \ 402 if (ctx_pos != -1) \ 403 DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \ 404 } \ 405 memcpy(state->data_stack+state->data_stack_base, data, size); \ 406 state->data_stack_base += size; \ 407} while (0) 408 409#define DATA_STACK_POP(state, data, size, discard) \ 410do { \ 411 TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \ 412 "(%" PY_FORMAT_SIZE_T "d)\n", \ 413 data, state->data_stack_base-size, size)); \ 414 memcpy(data, state->data_stack+state->data_stack_base-size, size); \ 415 if (discard) \ 416 state->data_stack_base -= size; \ 417} while (0) 418 419#define DATA_STACK_POP_DISCARD(state, size) \ 420do { \ 421 TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \ 422 "(%" PY_FORMAT_SIZE_T "d)\n", \ 423 state->data_stack_base-size, size)); \ 424 state->data_stack_base -= size; \ 425} while(0) 426 427#define DATA_PUSH(x) \ 428 DATA_STACK_PUSH(state, (x), sizeof(*(x))) 429#define DATA_POP(x) \ 430 DATA_STACK_POP(state, (x), sizeof(*(x)), 1) 431#define DATA_POP_DISCARD(x) \ 432 DATA_STACK_POP_DISCARD(state, sizeof(*(x))) 433#define DATA_ALLOC(t,p) \ 434 DATA_STACK_ALLOC(state, t, p) 435#define DATA_LOOKUP_AT(t,p,pos) \ 436 DATA_STACK_LOOKUP_AT(state,t,p,pos) 437 438#define MARK_PUSH(lastmark) \ 439 do if (lastmark > 0) { \ 440 i = lastmark; /* ctx->lastmark may change if reallocated */ \ 441 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \ 442 } while (0) 443#define MARK_POP(lastmark) \ 444 do if (lastmark > 0) { \ 445 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \ 446 } while (0) 447#define MARK_POP_KEEP(lastmark) \ 448 do if (lastmark > 0) { \ 449 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \ 450 } while (0) 451#define MARK_POP_DISCARD(lastmark) \ 452 do if (lastmark > 0) { \ 453 DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \ 454 } while (0) 455 456#define JUMP_NONE 0 457#define JUMP_MAX_UNTIL_1 1 458#define JUMP_MAX_UNTIL_2 2 459#define JUMP_MAX_UNTIL_3 3 460#define JUMP_MIN_UNTIL_1 4 461#define JUMP_MIN_UNTIL_2 5 462#define JUMP_MIN_UNTIL_3 6 463#define JUMP_REPEAT 7 464#define JUMP_REPEAT_ONE_1 8 465#define JUMP_REPEAT_ONE_2 9 466#define JUMP_MIN_REPEAT_ONE 10 467#define JUMP_BRANCH 11 468#define JUMP_ASSERT 12 469#define JUMP_ASSERT_NOT 13 470 471#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, matchall) \ 472 DATA_ALLOC(SRE(match_context), nextctx); \ 473 nextctx->last_ctx_pos = ctx_pos; \ 474 nextctx->jump = jumpvalue; \ 475 nextctx->pattern = nextpattern; \ 476 nextctx->match_all = matchall; \ 477 ctx_pos = alloc_pos; \ 478 ctx = nextctx; \ 479 goto entrance; \ 480 jumplabel: \ 481 while (0) /* gcc doesn't like labels at end of scopes */ \ 482 483#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \ 484 DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->match_all) 485 486#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \ 487 DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0) 488 489typedef struct { 490 Py_ssize_t last_ctx_pos; 491 Py_ssize_t jump; 492 SRE_CHAR* ptr; 493 SRE_CODE* pattern; 494 Py_ssize_t count; 495 Py_ssize_t lastmark; 496 Py_ssize_t lastindex; 497 union { 498 SRE_CODE chr; 499 SRE_REPEAT* rep; 500 } u; 501 int match_all; 502} SRE(match_context); 503 504/* check if string matches the given pattern. returns <0 for 505 error, 0 for failure, and 1 for success */ 506LOCAL(Py_ssize_t) 507SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all) 508{ 509 SRE_CHAR* end = (SRE_CHAR *)state->end; 510 Py_ssize_t alloc_pos, ctx_pos = -1; 511 Py_ssize_t i, ret = 0; 512 Py_ssize_t jump; 513 unsigned int sigcount=0; 514 515 SRE(match_context)* ctx; 516 SRE(match_context)* nextctx; 517 518 TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); 519 520 DATA_ALLOC(SRE(match_context), ctx); 521 ctx->last_ctx_pos = -1; 522 ctx->jump = JUMP_NONE; 523 ctx->pattern = pattern; 524 ctx->match_all = match_all; 525 ctx_pos = alloc_pos; 526 527entrance: 528 529 ctx->ptr = (SRE_CHAR *)state->ptr; 530 531 if (ctx->pattern[0] == SRE_OP_INFO) { 532 /* optimization info block */ 533 /* <INFO> <1=skip> <2=flags> <3=min> ... */ 534 if (ctx->pattern[3] && (Py_uintptr_t)(end - ctx->ptr) < ctx->pattern[3]) { 535 TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, " 536 "need %" PY_FORMAT_SIZE_T "d)\n", 537 end - ctx->ptr, (Py_ssize_t) ctx->pattern[3])); 538 RETURN_FAILURE; 539 } 540 ctx->pattern += ctx->pattern[1] + 1; 541 } 542 543 for (;;) { 544 ++sigcount; 545 if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals()) 546 RETURN_ERROR(SRE_ERROR_INTERRUPTED); 547 548 switch (*ctx->pattern++) { 549 550 case SRE_OP_MARK: 551 /* set mark */ 552 /* <MARK> <gid> */ 553 TRACE(("|%p|%p|MARK %d\n", ctx->pattern, 554 ctx->ptr, ctx->pattern[0])); 555 i = ctx->pattern[0]; 556 if (i & 1) 557 state->lastindex = i/2 + 1; 558 if (i > state->lastmark) { 559 /* state->lastmark is the highest valid index in the 560 state->mark array. If it is increased by more than 1, 561 the intervening marks must be set to NULL to signal 562 that these marks have not been encountered. */ 563 Py_ssize_t j = state->lastmark + 1; 564 while (j < i) 565 state->mark[j++] = NULL; 566 state->lastmark = i; 567 } 568 state->mark[i] = ctx->ptr; 569 ctx->pattern++; 570 break; 571 572 case SRE_OP_LITERAL: 573 /* match literal string */ 574 /* <LITERAL> <code> */ 575 TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern, 576 ctx->ptr, *ctx->pattern)); 577 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0]) 578 RETURN_FAILURE; 579 ctx->pattern++; 580 ctx->ptr++; 581 break; 582 583 case SRE_OP_NOT_LITERAL: 584 /* match anything that is not literal character */ 585 /* <NOT_LITERAL> <code> */ 586 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern, 587 ctx->ptr, *ctx->pattern)); 588 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0]) 589 RETURN_FAILURE; 590 ctx->pattern++; 591 ctx->ptr++; 592 break; 593 594 case SRE_OP_SUCCESS: 595 /* end of pattern */ 596 TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr)); 597 if (!ctx->match_all || ctx->ptr == state->end) { 598 state->ptr = ctx->ptr; 599 RETURN_SUCCESS; 600 } 601 RETURN_FAILURE; 602 603 case SRE_OP_AT: 604 /* match at given position */ 605 /* <AT> <code> */ 606 TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern)); 607 if (!SRE(at)(state, ctx->ptr, *ctx->pattern)) 608 RETURN_FAILURE; 609 ctx->pattern++; 610 break; 611 612 case SRE_OP_CATEGORY: 613 /* match at given category */ 614 /* <CATEGORY> <code> */ 615 TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern, 616 ctx->ptr, *ctx->pattern)); 617 if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0])) 618 RETURN_FAILURE; 619 ctx->pattern++; 620 ctx->ptr++; 621 break; 622 623 case SRE_OP_ANY: 624 /* match anything (except a newline) */ 625 /* <ANY> */ 626 TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr)); 627 if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0])) 628 RETURN_FAILURE; 629 ctx->ptr++; 630 break; 631 632 case SRE_OP_ANY_ALL: 633 /* match anything */ 634 /* <ANY_ALL> */ 635 TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr)); 636 if (ctx->ptr >= end) 637 RETURN_FAILURE; 638 ctx->ptr++; 639 break; 640 641 case SRE_OP_IN: 642 /* match set member (or non_member) */ 643 /* <IN> <skip> <set> */ 644 TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr)); 645 if (ctx->ptr >= end || 646 !SRE(charset)(state, ctx->pattern + 1, *ctx->ptr)) 647 RETURN_FAILURE; 648 ctx->pattern += ctx->pattern[0]; 649 ctx->ptr++; 650 break; 651 652 case SRE_OP_LITERAL_IGNORE: 653 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", 654 ctx->pattern, ctx->ptr, ctx->pattern[0])); 655 if (ctx->ptr >= end || 656 state->lower(*ctx->ptr) != state->lower(*ctx->pattern)) 657 RETURN_FAILURE; 658 ctx->pattern++; 659 ctx->ptr++; 660 break; 661 662 case SRE_OP_NOT_LITERAL_IGNORE: 663 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", 664 ctx->pattern, ctx->ptr, *ctx->pattern)); 665 if (ctx->ptr >= end || 666 state->lower(*ctx->ptr) == state->lower(*ctx->pattern)) 667 RETURN_FAILURE; 668 ctx->pattern++; 669 ctx->ptr++; 670 break; 671 672 case SRE_OP_IN_IGNORE: 673 TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr)); 674 if (ctx->ptr >= end 675 || !SRE(charset)(state, ctx->pattern+1, 676 (SRE_CODE)state->lower(*ctx->ptr))) 677 RETURN_FAILURE; 678 ctx->pattern += ctx->pattern[0]; 679 ctx->ptr++; 680 break; 681 682 case SRE_OP_JUMP: 683 case SRE_OP_INFO: 684 /* jump forward */ 685 /* <JUMP> <offset> */ 686 TRACE(("|%p|%p|JUMP %d\n", ctx->pattern, 687 ctx->ptr, ctx->pattern[0])); 688 ctx->pattern += ctx->pattern[0]; 689 break; 690 691 case SRE_OP_BRANCH: 692 /* alternation */ 693 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */ 694 TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr)); 695 LASTMARK_SAVE(); 696 ctx->u.rep = state->repeat; 697 if (ctx->u.rep) 698 MARK_PUSH(ctx->lastmark); 699 for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) { 700 if (ctx->pattern[1] == SRE_OP_LITERAL && 701 (ctx->ptr >= end || 702 (SRE_CODE) *ctx->ptr != ctx->pattern[2])) 703 continue; 704 if (ctx->pattern[1] == SRE_OP_IN && 705 (ctx->ptr >= end || 706 !SRE(charset)(state, ctx->pattern + 3, 707 (SRE_CODE) *ctx->ptr))) 708 continue; 709 state->ptr = ctx->ptr; 710 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1); 711 if (ret) { 712 if (ctx->u.rep) 713 MARK_POP_DISCARD(ctx->lastmark); 714 RETURN_ON_ERROR(ret); 715 RETURN_SUCCESS; 716 } 717 if (ctx->u.rep) 718 MARK_POP_KEEP(ctx->lastmark); 719 LASTMARK_RESTORE(); 720 } 721 if (ctx->u.rep) 722 MARK_POP_DISCARD(ctx->lastmark); 723 RETURN_FAILURE; 724 725 case SRE_OP_REPEAT_ONE: 726 /* match repeated sequence (maximizing regexp) */ 727 728 /* this operator only works if the repeated item is 729 exactly one character wide, and we're not already 730 collecting backtracking points. for other cases, 731 use the MAX_REPEAT operator */ 732 733 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ 734 735 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, 736 ctx->pattern[1], ctx->pattern[2])); 737 738 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr) 739 RETURN_FAILURE; /* cannot match */ 740 741 state->ptr = ctx->ptr; 742 743 ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[2]); 744 RETURN_ON_ERROR(ret); 745 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); 746 ctx->count = ret; 747 ctx->ptr += ctx->count; 748 749 /* when we arrive here, count contains the number of 750 matches, and ctx->ptr points to the tail of the target 751 string. check if the rest of the pattern matches, 752 and backtrack if not. */ 753 754 if (ctx->count < (Py_ssize_t) ctx->pattern[1]) 755 RETURN_FAILURE; 756 757 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS && 758 ctx->ptr == state->end) { 759 /* tail is empty. we're finished */ 760 state->ptr = ctx->ptr; 761 RETURN_SUCCESS; 762 } 763 764 LASTMARK_SAVE(); 765 766 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) { 767 /* tail starts with a literal. skip positions where 768 the rest of the pattern cannot possibly match */ 769 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1]; 770 for (;;) { 771 while (ctx->count >= (Py_ssize_t) ctx->pattern[1] && 772 (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) { 773 ctx->ptr--; 774 ctx->count--; 775 } 776 if (ctx->count < (Py_ssize_t) ctx->pattern[1]) 777 break; 778 state->ptr = ctx->ptr; 779 DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1, 780 ctx->pattern+ctx->pattern[0]); 781 if (ret) { 782 RETURN_ON_ERROR(ret); 783 RETURN_SUCCESS; 784 } 785 786 LASTMARK_RESTORE(); 787 788 ctx->ptr--; 789 ctx->count--; 790 } 791 792 } else { 793 /* general case */ 794 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) { 795 state->ptr = ctx->ptr; 796 DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2, 797 ctx->pattern+ctx->pattern[0]); 798 if (ret) { 799 RETURN_ON_ERROR(ret); 800 RETURN_SUCCESS; 801 } 802 ctx->ptr--; 803 ctx->count--; 804 LASTMARK_RESTORE(); 805 } 806 } 807 RETURN_FAILURE; 808 809 case SRE_OP_MIN_REPEAT_ONE: 810 /* match repeated sequence (minimizing regexp) */ 811 812 /* this operator only works if the repeated item is 813 exactly one character wide, and we're not already 814 collecting backtracking points. for other cases, 815 use the MIN_REPEAT operator */ 816 817 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ 818 819 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, 820 ctx->pattern[1], ctx->pattern[2])); 821 822 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr) 823 RETURN_FAILURE; /* cannot match */ 824 825 state->ptr = ctx->ptr; 826 827 if (ctx->pattern[1] == 0) 828 ctx->count = 0; 829 else { 830 /* count using pattern min as the maximum */ 831 ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[1]); 832 RETURN_ON_ERROR(ret); 833 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); 834 if (ret < (Py_ssize_t) ctx->pattern[1]) 835 /* didn't match minimum number of times */ 836 RETURN_FAILURE; 837 /* advance past minimum matches of repeat */ 838 ctx->count = ret; 839 ctx->ptr += ctx->count; 840 } 841 842 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS && 843 (!match_all || ctx->ptr == state->end)) { 844 /* tail is empty. we're finished */ 845 state->ptr = ctx->ptr; 846 RETURN_SUCCESS; 847 848 } else { 849 /* general case */ 850 LASTMARK_SAVE(); 851 while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT 852 || ctx->count <= (Py_ssize_t)ctx->pattern[2]) { 853 state->ptr = ctx->ptr; 854 DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, 855 ctx->pattern+ctx->pattern[0]); 856 if (ret) { 857 RETURN_ON_ERROR(ret); 858 RETURN_SUCCESS; 859 } 860 state->ptr = ctx->ptr; 861 ret = SRE(count)(state, ctx->pattern+3, 1); 862 RETURN_ON_ERROR(ret); 863 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); 864 if (ret == 0) 865 break; 866 assert(ret == 1); 867 ctx->ptr++; 868 ctx->count++; 869 LASTMARK_RESTORE(); 870 } 871 } 872 RETURN_FAILURE; 873 874 case SRE_OP_REPEAT: 875 /* create repeat context. all the hard work is done 876 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ 877 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */ 878 TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr, 879 ctx->pattern[1], ctx->pattern[2])); 880 881 /* install new repeat context */ 882 ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep)); 883 if (!ctx->u.rep) { 884 PyErr_NoMemory(); 885 RETURN_FAILURE; 886 } 887 ctx->u.rep->count = -1; 888 ctx->u.rep->pattern = ctx->pattern; 889 ctx->u.rep->prev = state->repeat; 890 ctx->u.rep->last_ptr = NULL; 891 state->repeat = ctx->u.rep; 892 893 state->ptr = ctx->ptr; 894 DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]); 895 state->repeat = ctx->u.rep->prev; 896 PyObject_FREE(ctx->u.rep); 897 898 if (ret) { 899 RETURN_ON_ERROR(ret); 900 RETURN_SUCCESS; 901 } 902 RETURN_FAILURE; 903 904 case SRE_OP_MAX_UNTIL: 905 /* maximizing repeat */ 906 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */ 907 908 /* FIXME: we probably need to deal with zero-width 909 matches in here... */ 910 911 ctx->u.rep = state->repeat; 912 if (!ctx->u.rep) 913 RETURN_ERROR(SRE_ERROR_STATE); 914 915 state->ptr = ctx->ptr; 916 917 ctx->count = ctx->u.rep->count+1; 918 919 TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern, 920 ctx->ptr, ctx->count)); 921 922 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { 923 /* not enough matches */ 924 ctx->u.rep->count = ctx->count; 925 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, 926 ctx->u.rep->pattern+3); 927 if (ret) { 928 RETURN_ON_ERROR(ret); 929 RETURN_SUCCESS; 930 } 931 ctx->u.rep->count = ctx->count-1; 932 state->ptr = ctx->ptr; 933 RETURN_FAILURE; 934 } 935 936 if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] || 937 ctx->u.rep->pattern[2] == SRE_MAXREPEAT) && 938 state->ptr != ctx->u.rep->last_ptr) { 939 /* we may have enough matches, but if we can 940 match another item, do so */ 941 ctx->u.rep->count = ctx->count; 942 LASTMARK_SAVE(); 943 MARK_PUSH(ctx->lastmark); 944 /* zero-width match protection */ 945 DATA_PUSH(&ctx->u.rep->last_ptr); 946 ctx->u.rep->last_ptr = state->ptr; 947 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, 948 ctx->u.rep->pattern+3); 949 DATA_POP(&ctx->u.rep->last_ptr); 950 if (ret) { 951 MARK_POP_DISCARD(ctx->lastmark); 952 RETURN_ON_ERROR(ret); 953 RETURN_SUCCESS; 954 } 955 MARK_POP(ctx->lastmark); 956 LASTMARK_RESTORE(); 957 ctx->u.rep->count = ctx->count-1; 958 state->ptr = ctx->ptr; 959 } 960 961 /* cannot match more repeated items here. make sure the 962 tail matches */ 963 state->repeat = ctx->u.rep->prev; 964 DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern); 965 RETURN_ON_SUCCESS(ret); 966 state->repeat = ctx->u.rep; 967 state->ptr = ctx->ptr; 968 RETURN_FAILURE; 969 970 case SRE_OP_MIN_UNTIL: 971 /* minimizing repeat */ 972 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */ 973 974 ctx->u.rep = state->repeat; 975 if (!ctx->u.rep) 976 RETURN_ERROR(SRE_ERROR_STATE); 977 978 state->ptr = ctx->ptr; 979 980 ctx->count = ctx->u.rep->count+1; 981 982 TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern, 983 ctx->ptr, ctx->count, ctx->u.rep->pattern)); 984 985 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { 986 /* not enough matches */ 987 ctx->u.rep->count = ctx->count; 988 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, 989 ctx->u.rep->pattern+3); 990 if (ret) { 991 RETURN_ON_ERROR(ret); 992 RETURN_SUCCESS; 993 } 994 ctx->u.rep->count = ctx->count-1; 995 state->ptr = ctx->ptr; 996 RETURN_FAILURE; 997 } 998 999 LASTMARK_SAVE(); 1000 1001 /* see if the tail matches */ 1002 state->repeat = ctx->u.rep->prev; 1003 DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern); 1004 if (ret) { 1005 RETURN_ON_ERROR(ret); 1006 RETURN_SUCCESS; 1007 } 1008 1009 state->repeat = ctx->u.rep; 1010 state->ptr = ctx->ptr; 1011 1012 LASTMARK_RESTORE(); 1013 1014 if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2] 1015 && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) || 1016 state->ptr == ctx->u.rep->last_ptr) 1017 RETURN_FAILURE; 1018 1019 ctx->u.rep->count = ctx->count; 1020 /* zero-width match protection */ 1021 DATA_PUSH(&ctx->u.rep->last_ptr); 1022 ctx->u.rep->last_ptr = state->ptr; 1023 DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, 1024 ctx->u.rep->pattern+3); 1025 DATA_POP(&ctx->u.rep->last_ptr); 1026 if (ret) { 1027 RETURN_ON_ERROR(ret); 1028 RETURN_SUCCESS; 1029 } 1030 ctx->u.rep->count = ctx->count-1; 1031 state->ptr = ctx->ptr; 1032 RETURN_FAILURE; 1033 1034 case SRE_OP_GROUPREF: 1035 /* match backreference */ 1036 TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern, 1037 ctx->ptr, ctx->pattern[0])); 1038 i = ctx->pattern[0]; 1039 { 1040 Py_ssize_t groupref = i+i; 1041 if (groupref >= state->lastmark) { 1042 RETURN_FAILURE; 1043 } else { 1044 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; 1045 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; 1046 if (!p || !e || e < p) 1047 RETURN_FAILURE; 1048 while (p < e) { 1049 if (ctx->ptr >= end || *ctx->ptr != *p) 1050 RETURN_FAILURE; 1051 p++; 1052 ctx->ptr++; 1053 } 1054 } 1055 } 1056 ctx->pattern++; 1057 break; 1058 1059 case SRE_OP_GROUPREF_IGNORE: 1060 /* match backreference */ 1061 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern, 1062 ctx->ptr, ctx->pattern[0])); 1063 i = ctx->pattern[0]; 1064 { 1065 Py_ssize_t groupref = i+i; 1066 if (groupref >= state->lastmark) { 1067 RETURN_FAILURE; 1068 } else { 1069 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; 1070 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; 1071 if (!p || !e || e < p) 1072 RETURN_FAILURE; 1073 while (p < e) { 1074 if (ctx->ptr >= end || 1075 state->lower(*ctx->ptr) != state->lower(*p)) 1076 RETURN_FAILURE; 1077 p++; 1078 ctx->ptr++; 1079 } 1080 } 1081 } 1082 ctx->pattern++; 1083 break; 1084 1085 case SRE_OP_GROUPREF_EXISTS: 1086 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern, 1087 ctx->ptr, ctx->pattern[0])); 1088 /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */ 1089 i = ctx->pattern[0]; 1090 { 1091 Py_ssize_t groupref = i+i; 1092 if (groupref >= state->lastmark) { 1093 ctx->pattern += ctx->pattern[1]; 1094 break; 1095 } else { 1096 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; 1097 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; 1098 if (!p || !e || e < p) { 1099 ctx->pattern += ctx->pattern[1]; 1100 break; 1101 } 1102 } 1103 } 1104 ctx->pattern += 2; 1105 break; 1106 1107 case SRE_OP_ASSERT: 1108 /* assert subpattern */ 1109 /* <ASSERT> <skip> <back> <pattern> */ 1110 TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern, 1111 ctx->ptr, ctx->pattern[1])); 1112 if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1]) 1113 RETURN_FAILURE; 1114 state->ptr = ctx->ptr - ctx->pattern[1]; 1115 DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2); 1116 RETURN_ON_FAILURE(ret); 1117 ctx->pattern += ctx->pattern[0]; 1118 break; 1119 1120 case SRE_OP_ASSERT_NOT: 1121 /* assert not subpattern */ 1122 /* <ASSERT_NOT> <skip> <back> <pattern> */ 1123 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern, 1124 ctx->ptr, ctx->pattern[1])); 1125 if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) { 1126 state->ptr = ctx->ptr - ctx->pattern[1]; 1127 DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2); 1128 if (ret) { 1129 RETURN_ON_ERROR(ret); 1130 RETURN_FAILURE; 1131 } 1132 } 1133 ctx->pattern += ctx->pattern[0]; 1134 break; 1135 1136 case SRE_OP_FAILURE: 1137 /* immediate failure */ 1138 TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr)); 1139 RETURN_FAILURE; 1140 1141 default: 1142 TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr, 1143 ctx->pattern[-1])); 1144 RETURN_ERROR(SRE_ERROR_ILLEGAL); 1145 } 1146 } 1147 1148exit: 1149 ctx_pos = ctx->last_ctx_pos; 1150 jump = ctx->jump; 1151 DATA_POP_DISCARD(ctx); 1152 if (ctx_pos == -1) 1153 return ret; 1154 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); 1155 1156 switch (jump) { 1157 case JUMP_MAX_UNTIL_2: 1158 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr)); 1159 goto jump_max_until_2; 1160 case JUMP_MAX_UNTIL_3: 1161 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr)); 1162 goto jump_max_until_3; 1163 case JUMP_MIN_UNTIL_2: 1164 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr)); 1165 goto jump_min_until_2; 1166 case JUMP_MIN_UNTIL_3: 1167 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr)); 1168 goto jump_min_until_3; 1169 case JUMP_BRANCH: 1170 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr)); 1171 goto jump_branch; 1172 case JUMP_MAX_UNTIL_1: 1173 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr)); 1174 goto jump_max_until_1; 1175 case JUMP_MIN_UNTIL_1: 1176 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr)); 1177 goto jump_min_until_1; 1178 case JUMP_REPEAT: 1179 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr)); 1180 goto jump_repeat; 1181 case JUMP_REPEAT_ONE_1: 1182 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr)); 1183 goto jump_repeat_one_1; 1184 case JUMP_REPEAT_ONE_2: 1185 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr)); 1186 goto jump_repeat_one_2; 1187 case JUMP_MIN_REPEAT_ONE: 1188 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr)); 1189 goto jump_min_repeat_one; 1190 case JUMP_ASSERT: 1191 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr)); 1192 goto jump_assert; 1193 case JUMP_ASSERT_NOT: 1194 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr)); 1195 goto jump_assert_not; 1196 case JUMP_NONE: 1197 TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern, 1198 ctx->ptr, ret)); 1199 break; 1200 } 1201 1202 return ret; /* should never get here */ 1203} 1204 1205LOCAL(Py_ssize_t) 1206SRE(search)(SRE_STATE* state, SRE_CODE* pattern) 1207{ 1208 SRE_CHAR* ptr = (SRE_CHAR *)state->start; 1209 SRE_CHAR* end = (SRE_CHAR *)state->end; 1210 Py_ssize_t status = 0; 1211 Py_ssize_t prefix_len = 0; 1212 Py_ssize_t prefix_skip = 0; 1213 SRE_CODE* prefix = NULL; 1214 SRE_CODE* charset = NULL; 1215 SRE_CODE* overlap = NULL; 1216 int flags = 0; 1217 1218 if (ptr > end) 1219 return 0; 1220 1221 if (pattern[0] == SRE_OP_INFO) { 1222 /* optimization info block */ 1223 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ 1224 1225 flags = pattern[2]; 1226 1227 if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) { 1228 TRACE(("reject (got %u chars, need %u)\n", 1229 (unsigned int)(end - ptr), pattern[3])); 1230 return 0; 1231 } 1232 if (pattern[3] > 1) { 1233 /* adjust end point (but make sure we leave at least one 1234 character in there, so literal search will work) */ 1235 end -= pattern[3] - 1; 1236 if (end <= ptr) 1237 end = ptr; 1238 } 1239 1240 if (flags & SRE_INFO_PREFIX) { 1241 /* pattern starts with a known prefix */ 1242 /* <length> <skip> <prefix data> <overlap data> */ 1243 prefix_len = pattern[5]; 1244 prefix_skip = pattern[6]; 1245 prefix = pattern + 7; 1246 overlap = prefix + prefix_len - 1; 1247 } else if (flags & SRE_INFO_CHARSET) 1248 /* pattern starts with a character from a known set */ 1249 /* <charset> */ 1250 charset = pattern + 5; 1251 1252 pattern += 1 + pattern[1]; 1253 } 1254 1255 TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n", 1256 prefix, prefix_len, prefix_skip)); 1257 TRACE(("charset = %p\n", charset)); 1258 1259#if defined(USE_FAST_SEARCH) 1260 if (prefix_len > 1) { 1261 /* pattern starts with a known prefix. use the overlap 1262 table to skip forward as fast as we possibly can */ 1263 Py_ssize_t i = 0; 1264 1265 end = (SRE_CHAR *)state->end; 1266 if (prefix_len > end - ptr) 1267 return 0; 1268#if SIZEOF_SRE_CHAR < 4 1269 for (i = 0; i < prefix_len; i++) 1270 if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i]) 1271 return 0; /* literal can't match: doesn't fit in char width */ 1272#endif 1273 while (ptr < end) { 1274 SRE_CHAR c = (SRE_CHAR) prefix[0]; 1275 while (*ptr++ != c) { 1276 if (ptr >= end) 1277 return 0; 1278 } 1279 if (ptr >= end) 1280 return 0; 1281 1282 i = 1; 1283 do { 1284 if (*ptr == (SRE_CHAR) prefix[i]) { 1285 if (++i != prefix_len) { 1286 if (++ptr >= end) 1287 return 0; 1288 continue; 1289 } 1290 /* found a potential match */ 1291 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr)); 1292 state->start = ptr - (prefix_len - 1); 1293 state->ptr = ptr - (prefix_len - prefix_skip - 1); 1294 if (flags & SRE_INFO_LITERAL) 1295 return 1; /* we got all of it */ 1296 status = SRE(match)(state, pattern + 2*prefix_skip, 0); 1297 if (status != 0) 1298 return status; 1299 /* close but no cigar -- try again */ 1300 if (++ptr >= end) 1301 return 0; 1302 } 1303 i = overlap[i]; 1304 } while (i != 0); 1305 } 1306 return 0; 1307 } 1308#endif 1309 1310 if (pattern[0] == SRE_OP_LITERAL) { 1311 /* pattern starts with a literal character. this is used 1312 for short prefixes, and if fast search is disabled */ 1313 SRE_CHAR c = (SRE_CHAR) pattern[1]; 1314#if SIZEOF_SRE_CHAR < 4 1315 if ((SRE_CODE) c != pattern[1]) 1316 return 0; /* literal can't match: doesn't fit in char width */ 1317#endif 1318 end = (SRE_CHAR *)state->end; 1319 while (ptr < end) { 1320 while (*ptr != c) { 1321 if (++ptr >= end) 1322 return 0; 1323 } 1324 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); 1325 state->start = ptr; 1326 state->ptr = ++ptr; 1327 if (flags & SRE_INFO_LITERAL) 1328 return 1; /* we got all of it */ 1329 status = SRE(match)(state, pattern + 2, 0); 1330 if (status != 0) 1331 break; 1332 } 1333 } else if (charset) { 1334 /* pattern starts with a character from a known set */ 1335 end = (SRE_CHAR *)state->end; 1336 for (;;) { 1337 while (ptr < end && !SRE(charset)(state, charset, *ptr)) 1338 ptr++; 1339 if (ptr >= end) 1340 return 0; 1341 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr)); 1342 state->start = ptr; 1343 state->ptr = ptr; 1344 status = SRE(match)(state, pattern, 0); 1345 if (status != 0) 1346 break; 1347 ptr++; 1348 } 1349 } else { 1350 /* general case */ 1351 assert(ptr <= end); 1352 while (1) { 1353 TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); 1354 state->start = state->ptr = ptr; 1355 status = SRE(match)(state, pattern, 0); 1356 if (status != 0 || ptr >= end) 1357 break; 1358 ptr++; 1359 } 1360 } 1361 1362 return status; 1363} 1364 1365#undef SRE_CHAR 1366#undef SIZEOF_SRE_CHAR 1367#undef SRE 1368 1369/* vim:ts=4:sw=4:et 1370*/ 1371