sre_lib.h revision 12b2538ab8e44d69d9ed2c8b329812130db9e6bc
1/* 2 * Secret Labs' Regular Expression Engine 3 * 4 * regular expression matching engine 5 * 6 * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. 7 * 8 * See the _sre.c file for information on usage and redistribution. 9 */ 10 11/* String matching engine */ 12 13/* This file is included three times, with different character settings */ 14 15LOCAL(int) 16SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at) 17{ 18 /* check if pointer is at given position */ 19 20 Py_ssize_t thisp, thatp; 21 22 switch (at) { 23 24 case SRE_AT_BEGINNING: 25 case SRE_AT_BEGINNING_STRING: 26 return ((void*) ptr == state->beginning); 27 28 case SRE_AT_BEGINNING_LINE: 29 return ((void*) ptr == state->beginning || 30 SRE_IS_LINEBREAK((int) ptr[-1])); 31 32 case SRE_AT_END: 33 return (((SRE_CHAR *)state->end - ptr == 1 && 34 SRE_IS_LINEBREAK((int) ptr[0])) || 35 ((void*) ptr == state->end)); 36 37 case SRE_AT_END_LINE: 38 return ((void*) ptr == state->end || 39 SRE_IS_LINEBREAK((int) ptr[0])); 40 41 case SRE_AT_END_STRING: 42 return ((void*) ptr == state->end); 43 44 case SRE_AT_BOUNDARY: 45 if (state->beginning == state->end) 46 return 0; 47 thatp = ((void*) ptr > state->beginning) ? 48 SRE_IS_WORD((int) ptr[-1]) : 0; 49 thisp = ((void*) ptr < state->end) ? 50 SRE_IS_WORD((int) ptr[0]) : 0; 51 return thisp != thatp; 52 53 case SRE_AT_NON_BOUNDARY: 54 if (state->beginning == state->end) 55 return 0; 56 thatp = ((void*) ptr > state->beginning) ? 57 SRE_IS_WORD((int) ptr[-1]) : 0; 58 thisp = ((void*) ptr < state->end) ? 59 SRE_IS_WORD((int) ptr[0]) : 0; 60 return thisp == thatp; 61 62 case SRE_AT_LOC_BOUNDARY: 63 if (state->beginning == state->end) 64 return 0; 65 thatp = ((void*) ptr > state->beginning) ? 66 SRE_LOC_IS_WORD((int) ptr[-1]) : 0; 67 thisp = ((void*) ptr < state->end) ? 68 SRE_LOC_IS_WORD((int) ptr[0]) : 0; 69 return thisp != thatp; 70 71 case SRE_AT_LOC_NON_BOUNDARY: 72 if (state->beginning == state->end) 73 return 0; 74 thatp = ((void*) ptr > state->beginning) ? 75 SRE_LOC_IS_WORD((int) ptr[-1]) : 0; 76 thisp = ((void*) ptr < state->end) ? 77 SRE_LOC_IS_WORD((int) ptr[0]) : 0; 78 return thisp == thatp; 79 80 case SRE_AT_UNI_BOUNDARY: 81 if (state->beginning == state->end) 82 return 0; 83 thatp = ((void*) ptr > state->beginning) ? 84 SRE_UNI_IS_WORD((int) ptr[-1]) : 0; 85 thisp = ((void*) ptr < state->end) ? 86 SRE_UNI_IS_WORD((int) ptr[0]) : 0; 87 return thisp != thatp; 88 89 case SRE_AT_UNI_NON_BOUNDARY: 90 if (state->beginning == state->end) 91 return 0; 92 thatp = ((void*) ptr > state->beginning) ? 93 SRE_UNI_IS_WORD((int) ptr[-1]) : 0; 94 thisp = ((void*) ptr < state->end) ? 95 SRE_UNI_IS_WORD((int) ptr[0]) : 0; 96 return thisp == thatp; 97 98 } 99 100 return 0; 101} 102 103LOCAL(int) 104SRE(charset)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch) 105{ 106 /* check if character is a member of the given set */ 107 108 int ok = 1; 109 110 for (;;) { 111 switch (*set++) { 112 113 case SRE_OP_FAILURE: 114 return !ok; 115 116 case SRE_OP_LITERAL: 117 /* <LITERAL> <code> */ 118 if (ch == set[0]) 119 return ok; 120 set++; 121 break; 122 123 case SRE_OP_CATEGORY: 124 /* <CATEGORY> <code> */ 125 if (sre_category(set[0], (int) ch)) 126 return ok; 127 set++; 128 break; 129 130 case SRE_OP_CHARSET: 131 /* <CHARSET> <bitmap> */ 132 if (ch < 256 && 133 (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1))))) 134 return ok; 135 set += 256/SRE_CODE_BITS; 136 break; 137 138 case SRE_OP_RANGE: 139 /* <RANGE> <lower> <upper> */ 140 if (set[0] <= ch && ch <= set[1]) 141 return ok; 142 set += 2; 143 break; 144 145 case SRE_OP_RANGE_IGNORE: 146 /* <RANGE_IGNORE> <lower> <upper> */ 147 { 148 SRE_CODE uch; 149 /* ch is already lower cased */ 150 if (set[0] <= ch && ch <= set[1]) 151 return ok; 152 uch = state->upper(ch); 153 if (set[0] <= uch && uch <= set[1]) 154 return ok; 155 set += 2; 156 break; 157 } 158 159 case SRE_OP_NEGATE: 160 ok = !ok; 161 break; 162 163 case SRE_OP_BIGCHARSET: 164 /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */ 165 { 166 Py_ssize_t count, block; 167 count = *(set++); 168 169 if (ch < 0x10000u) 170 block = ((unsigned char*)set)[ch >> 8]; 171 else 172 block = -1; 173 set += 256/sizeof(SRE_CODE); 174 if (block >=0 && 175 (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] & 176 (1u << (ch & (SRE_CODE_BITS-1))))) 177 return ok; 178 set += count * (256/SRE_CODE_BITS); 179 break; 180 } 181 182 default: 183 /* internal error -- there's not much we can do about it 184 here, so let's just pretend it didn't match... */ 185 return 0; 186 } 187 } 188} 189 190LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all); 191 192LOCAL(Py_ssize_t) 193SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount) 194{ 195 SRE_CODE chr; 196 SRE_CHAR c; 197 SRE_CHAR* ptr = (SRE_CHAR *)state->ptr; 198 SRE_CHAR* end = (SRE_CHAR *)state->end; 199 Py_ssize_t i; 200 201 /* adjust end */ 202 if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT) 203 end = ptr + maxcount; 204 205 switch (pattern[0]) { 206 207 case SRE_OP_IN: 208 /* repeated set */ 209 TRACE(("|%p|%p|COUNT IN\n", pattern, ptr)); 210 while (ptr < end && SRE(charset)(state, pattern + 2, *ptr)) 211 ptr++; 212 break; 213 214 case SRE_OP_ANY: 215 /* repeated dot wildcard. */ 216 TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr)); 217 while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) 218 ptr++; 219 break; 220 221 case SRE_OP_ANY_ALL: 222 /* repeated dot wildcard. skip to the end of the target 223 string, and backtrack from there */ 224 TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr)); 225 ptr = end; 226 break; 227 228 case SRE_OP_LITERAL: 229 /* repeated literal */ 230 chr = pattern[1]; 231 TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr)); 232 c = (SRE_CHAR) chr; 233#if SIZEOF_SRE_CHAR < 4 234 if ((SRE_CODE) c != chr) 235 ; /* literal can't match: doesn't fit in char width */ 236 else 237#endif 238 while (ptr < end && *ptr == c) 239 ptr++; 240 break; 241 242 case SRE_OP_LITERAL_IGNORE: 243 /* repeated literal */ 244 chr = pattern[1]; 245 TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr)); 246 while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr) 247 ptr++; 248 break; 249 250 case SRE_OP_NOT_LITERAL: 251 /* repeated non-literal */ 252 chr = pattern[1]; 253 TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr)); 254 c = (SRE_CHAR) chr; 255#if SIZEOF_SRE_CHAR < 4 256 if ((SRE_CODE) c != chr) 257 ptr = end; /* literal can't match: doesn't fit in char width */ 258 else 259#endif 260 while (ptr < end && *ptr != c) 261 ptr++; 262 break; 263 264 case SRE_OP_NOT_LITERAL_IGNORE: 265 /* repeated non-literal */ 266 chr = pattern[1]; 267 TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr)); 268 while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr) 269 ptr++; 270 break; 271 272 default: 273 /* repeated single character pattern */ 274 TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); 275 while ((SRE_CHAR*) state->ptr < end) { 276 i = SRE(match)(state, pattern, 0); 277 if (i < 0) 278 return i; 279 if (!i) 280 break; 281 } 282 TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr, 283 (SRE_CHAR*) state->ptr - ptr)); 284 return (SRE_CHAR*) state->ptr - ptr; 285 } 286 287 TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr, 288 ptr - (SRE_CHAR*) state->ptr)); 289 return ptr - (SRE_CHAR*) state->ptr; 290} 291 292#if 0 /* not used in this release */ 293LOCAL(int) 294SRE(info)(SRE_STATE* state, SRE_CODE* pattern) 295{ 296 /* check if an SRE_OP_INFO block matches at the current position. 297 returns the number of SRE_CODE objects to skip if successful, 0 298 if no match */ 299 300 SRE_CHAR* end = (SRE_CHAR*) state->end; 301 SRE_CHAR* ptr = (SRE_CHAR*) state->ptr; 302 Py_ssize_t i; 303 304 /* check minimal length */ 305 if (pattern[3] && end - ptr < pattern[3]) 306 return 0; 307 308 /* check known prefix */ 309 if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) { 310 /* <length> <skip> <prefix data> <overlap data> */ 311 for (i = 0; i < pattern[5]; i++) 312 if ((SRE_CODE) ptr[i] != pattern[7 + i]) 313 return 0; 314 return pattern[0] + 2 * pattern[6]; 315 } 316 return pattern[0]; 317} 318#endif 319 320/* The macros below should be used to protect recursive SRE(match)() 321 * calls that *failed* and do *not* return immediately (IOW, those 322 * that will backtrack). Explaining: 323 * 324 * - Recursive SRE(match)() returned true: that's usually a success 325 * (besides atypical cases like ASSERT_NOT), therefore there's no 326 * reason to restore lastmark; 327 * 328 * - Recursive SRE(match)() returned false but the current SRE(match)() 329 * is returning to the caller: If the current SRE(match)() is the 330 * top function of the recursion, returning false will be a matching 331 * failure, and it doesn't matter where lastmark is pointing to. 332 * If it's *not* the top function, it will be a recursive SRE(match)() 333 * failure by itself, and the calling SRE(match)() will have to deal 334 * with the failure by the same rules explained here (it will restore 335 * lastmark by itself if necessary); 336 * 337 * - Recursive SRE(match)() returned false, and will continue the 338 * outside 'for' loop: must be protected when breaking, since the next 339 * OP could potentially depend on lastmark; 340 * 341 * - Recursive SRE(match)() returned false, and will be called again 342 * inside a local for/while loop: must be protected between each 343 * loop iteration, since the recursive SRE(match)() could do anything, 344 * and could potentially depend on lastmark. 345 * 346 * For more information, check the discussion at SF patch #712900. 347 */ 348#define LASTMARK_SAVE() \ 349 do { \ 350 ctx->lastmark = state->lastmark; \ 351 ctx->lastindex = state->lastindex; \ 352 } while (0) 353#define LASTMARK_RESTORE() \ 354 do { \ 355 state->lastmark = ctx->lastmark; \ 356 state->lastindex = ctx->lastindex; \ 357 } while (0) 358 359#define RETURN_ERROR(i) do { return i; } while(0) 360#define RETURN_FAILURE do { ret = 0; goto exit; } while(0) 361#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0) 362 363#define RETURN_ON_ERROR(i) \ 364 do { if (i < 0) RETURN_ERROR(i); } while (0) 365#define RETURN_ON_SUCCESS(i) \ 366 do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0) 367#define RETURN_ON_FAILURE(i) \ 368 do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0) 369 370#define DATA_STACK_ALLOC(state, type, ptr) \ 371do { \ 372 alloc_pos = state->data_stack_base; \ 373 TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \ 374 "(%" PY_FORMAT_SIZE_T "d)\n", \ 375 Py_STRINGIFY(type), alloc_pos, sizeof(type))); \ 376 if (sizeof(type) > state->data_stack_size - alloc_pos) { \ 377 int j = data_stack_grow(state, sizeof(type)); \ 378 if (j < 0) return j; \ 379 if (ctx_pos != -1) \ 380 DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \ 381 } \ 382 ptr = (type*)(state->data_stack+alloc_pos); \ 383 state->data_stack_base += sizeof(type); \ 384} while (0) 385 386#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \ 387do { \ 388 TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", Py_STRINGIFY(type), pos)); \ 389 ptr = (type*)(state->data_stack+pos); \ 390} while (0) 391 392#define DATA_STACK_PUSH(state, data, size) \ 393do { \ 394 TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \ 395 "(%" PY_FORMAT_SIZE_T "d)\n", \ 396 data, state->data_stack_base, size)); \ 397 if (size > state->data_stack_size - state->data_stack_base) { \ 398 int j = data_stack_grow(state, size); \ 399 if (j < 0) return j; \ 400 if (ctx_pos != -1) \ 401 DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \ 402 } \ 403 memcpy(state->data_stack+state->data_stack_base, data, size); \ 404 state->data_stack_base += size; \ 405} while (0) 406 407#define DATA_STACK_POP(state, data, size, discard) \ 408do { \ 409 TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \ 410 "(%" PY_FORMAT_SIZE_T "d)\n", \ 411 data, state->data_stack_base-size, size)); \ 412 memcpy(data, state->data_stack+state->data_stack_base-size, size); \ 413 if (discard) \ 414 state->data_stack_base -= size; \ 415} while (0) 416 417#define DATA_STACK_POP_DISCARD(state, size) \ 418do { \ 419 TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \ 420 "(%" PY_FORMAT_SIZE_T "d)\n", \ 421 state->data_stack_base-size, size)); \ 422 state->data_stack_base -= size; \ 423} while(0) 424 425#define DATA_PUSH(x) \ 426 DATA_STACK_PUSH(state, (x), sizeof(*(x))) 427#define DATA_POP(x) \ 428 DATA_STACK_POP(state, (x), sizeof(*(x)), 1) 429#define DATA_POP_DISCARD(x) \ 430 DATA_STACK_POP_DISCARD(state, sizeof(*(x))) 431#define DATA_ALLOC(t,p) \ 432 DATA_STACK_ALLOC(state, t, p) 433#define DATA_LOOKUP_AT(t,p,pos) \ 434 DATA_STACK_LOOKUP_AT(state,t,p,pos) 435 436#define MARK_PUSH(lastmark) \ 437 do if (lastmark > 0) { \ 438 i = lastmark; /* ctx->lastmark may change if reallocated */ \ 439 DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \ 440 } while (0) 441#define MARK_POP(lastmark) \ 442 do if (lastmark > 0) { \ 443 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \ 444 } while (0) 445#define MARK_POP_KEEP(lastmark) \ 446 do if (lastmark > 0) { \ 447 DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \ 448 } while (0) 449#define MARK_POP_DISCARD(lastmark) \ 450 do if (lastmark > 0) { \ 451 DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \ 452 } while (0) 453 454#define JUMP_NONE 0 455#define JUMP_MAX_UNTIL_1 1 456#define JUMP_MAX_UNTIL_2 2 457#define JUMP_MAX_UNTIL_3 3 458#define JUMP_MIN_UNTIL_1 4 459#define JUMP_MIN_UNTIL_2 5 460#define JUMP_MIN_UNTIL_3 6 461#define JUMP_REPEAT 7 462#define JUMP_REPEAT_ONE_1 8 463#define JUMP_REPEAT_ONE_2 9 464#define JUMP_MIN_REPEAT_ONE 10 465#define JUMP_BRANCH 11 466#define JUMP_ASSERT 12 467#define JUMP_ASSERT_NOT 13 468 469#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, matchall) \ 470 DATA_ALLOC(SRE(match_context), nextctx); \ 471 nextctx->last_ctx_pos = ctx_pos; \ 472 nextctx->jump = jumpvalue; \ 473 nextctx->pattern = nextpattern; \ 474 nextctx->match_all = matchall; \ 475 ctx_pos = alloc_pos; \ 476 ctx = nextctx; \ 477 goto entrance; \ 478 jumplabel: \ 479 while (0) /* gcc doesn't like labels at end of scopes */ \ 480 481#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \ 482 DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->match_all) 483 484#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \ 485 DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0) 486 487typedef struct { 488 Py_ssize_t last_ctx_pos; 489 Py_ssize_t jump; 490 SRE_CHAR* ptr; 491 SRE_CODE* pattern; 492 Py_ssize_t count; 493 Py_ssize_t lastmark; 494 Py_ssize_t lastindex; 495 union { 496 SRE_CODE chr; 497 SRE_REPEAT* rep; 498 } u; 499 int match_all; 500} SRE(match_context); 501 502/* check if string matches the given pattern. returns <0 for 503 error, 0 for failure, and 1 for success */ 504LOCAL(Py_ssize_t) 505SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all) 506{ 507 SRE_CHAR* end = (SRE_CHAR *)state->end; 508 Py_ssize_t alloc_pos, ctx_pos = -1; 509 Py_ssize_t i, ret = 0; 510 Py_ssize_t jump; 511 unsigned int sigcount=0; 512 513 SRE(match_context)* ctx; 514 SRE(match_context)* nextctx; 515 516 TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); 517 518 DATA_ALLOC(SRE(match_context), ctx); 519 ctx->last_ctx_pos = -1; 520 ctx->jump = JUMP_NONE; 521 ctx->pattern = pattern; 522 ctx->match_all = match_all; 523 ctx_pos = alloc_pos; 524 525entrance: 526 527 ctx->ptr = (SRE_CHAR *)state->ptr; 528 529 if (ctx->pattern[0] == SRE_OP_INFO) { 530 /* optimization info block */ 531 /* <INFO> <1=skip> <2=flags> <3=min> ... */ 532 if (ctx->pattern[3] && (Py_uintptr_t)(end - ctx->ptr) < ctx->pattern[3]) { 533 TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, " 534 "need %" PY_FORMAT_SIZE_T "d)\n", 535 end - ctx->ptr, (Py_ssize_t) ctx->pattern[3])); 536 RETURN_FAILURE; 537 } 538 ctx->pattern += ctx->pattern[1] + 1; 539 } 540 541 for (;;) { 542 ++sigcount; 543 if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals()) 544 RETURN_ERROR(SRE_ERROR_INTERRUPTED); 545 546 switch (*ctx->pattern++) { 547 548 case SRE_OP_MARK: 549 /* set mark */ 550 /* <MARK> <gid> */ 551 TRACE(("|%p|%p|MARK %d\n", ctx->pattern, 552 ctx->ptr, ctx->pattern[0])); 553 i = ctx->pattern[0]; 554 if (i & 1) 555 state->lastindex = i/2 + 1; 556 if (i > state->lastmark) { 557 /* state->lastmark is the highest valid index in the 558 state->mark array. If it is increased by more than 1, 559 the intervening marks must be set to NULL to signal 560 that these marks have not been encountered. */ 561 Py_ssize_t j = state->lastmark + 1; 562 while (j < i) 563 state->mark[j++] = NULL; 564 state->lastmark = i; 565 } 566 state->mark[i] = ctx->ptr; 567 ctx->pattern++; 568 break; 569 570 case SRE_OP_LITERAL: 571 /* match literal string */ 572 /* <LITERAL> <code> */ 573 TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern, 574 ctx->ptr, *ctx->pattern)); 575 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0]) 576 RETURN_FAILURE; 577 ctx->pattern++; 578 ctx->ptr++; 579 break; 580 581 case SRE_OP_NOT_LITERAL: 582 /* match anything that is not literal character */ 583 /* <NOT_LITERAL> <code> */ 584 TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern, 585 ctx->ptr, *ctx->pattern)); 586 if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0]) 587 RETURN_FAILURE; 588 ctx->pattern++; 589 ctx->ptr++; 590 break; 591 592 case SRE_OP_SUCCESS: 593 /* end of pattern */ 594 TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr)); 595 if (!ctx->match_all || ctx->ptr == state->end) { 596 state->ptr = ctx->ptr; 597 RETURN_SUCCESS; 598 } 599 RETURN_FAILURE; 600 601 case SRE_OP_AT: 602 /* match at given position */ 603 /* <AT> <code> */ 604 TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern)); 605 if (!SRE(at)(state, ctx->ptr, *ctx->pattern)) 606 RETURN_FAILURE; 607 ctx->pattern++; 608 break; 609 610 case SRE_OP_CATEGORY: 611 /* match at given category */ 612 /* <CATEGORY> <code> */ 613 TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern, 614 ctx->ptr, *ctx->pattern)); 615 if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0])) 616 RETURN_FAILURE; 617 ctx->pattern++; 618 ctx->ptr++; 619 break; 620 621 case SRE_OP_ANY: 622 /* match anything (except a newline) */ 623 /* <ANY> */ 624 TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr)); 625 if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0])) 626 RETURN_FAILURE; 627 ctx->ptr++; 628 break; 629 630 case SRE_OP_ANY_ALL: 631 /* match anything */ 632 /* <ANY_ALL> */ 633 TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr)); 634 if (ctx->ptr >= end) 635 RETURN_FAILURE; 636 ctx->ptr++; 637 break; 638 639 case SRE_OP_IN: 640 /* match set member (or non_member) */ 641 /* <IN> <skip> <set> */ 642 TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr)); 643 if (ctx->ptr >= end || 644 !SRE(charset)(state, ctx->pattern + 1, *ctx->ptr)) 645 RETURN_FAILURE; 646 ctx->pattern += ctx->pattern[0]; 647 ctx->ptr++; 648 break; 649 650 case SRE_OP_LITERAL_IGNORE: 651 TRACE(("|%p|%p|LITERAL_IGNORE %d\n", 652 ctx->pattern, ctx->ptr, ctx->pattern[0])); 653 if (ctx->ptr >= end || 654 state->lower(*ctx->ptr) != state->lower(*ctx->pattern)) 655 RETURN_FAILURE; 656 ctx->pattern++; 657 ctx->ptr++; 658 break; 659 660 case SRE_OP_NOT_LITERAL_IGNORE: 661 TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", 662 ctx->pattern, ctx->ptr, *ctx->pattern)); 663 if (ctx->ptr >= end || 664 state->lower(*ctx->ptr) == state->lower(*ctx->pattern)) 665 RETURN_FAILURE; 666 ctx->pattern++; 667 ctx->ptr++; 668 break; 669 670 case SRE_OP_IN_IGNORE: 671 TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr)); 672 if (ctx->ptr >= end 673 || !SRE(charset)(state, ctx->pattern+1, 674 (SRE_CODE)state->lower(*ctx->ptr))) 675 RETURN_FAILURE; 676 ctx->pattern += ctx->pattern[0]; 677 ctx->ptr++; 678 break; 679 680 case SRE_OP_JUMP: 681 case SRE_OP_INFO: 682 /* jump forward */ 683 /* <JUMP> <offset> */ 684 TRACE(("|%p|%p|JUMP %d\n", ctx->pattern, 685 ctx->ptr, ctx->pattern[0])); 686 ctx->pattern += ctx->pattern[0]; 687 break; 688 689 case SRE_OP_BRANCH: 690 /* alternation */ 691 /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */ 692 TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr)); 693 LASTMARK_SAVE(); 694 ctx->u.rep = state->repeat; 695 if (ctx->u.rep) 696 MARK_PUSH(ctx->lastmark); 697 for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) { 698 if (ctx->pattern[1] == SRE_OP_LITERAL && 699 (ctx->ptr >= end || 700 (SRE_CODE) *ctx->ptr != ctx->pattern[2])) 701 continue; 702 if (ctx->pattern[1] == SRE_OP_IN && 703 (ctx->ptr >= end || 704 !SRE(charset)(state, ctx->pattern + 3, 705 (SRE_CODE) *ctx->ptr))) 706 continue; 707 state->ptr = ctx->ptr; 708 DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1); 709 if (ret) { 710 if (ctx->u.rep) 711 MARK_POP_DISCARD(ctx->lastmark); 712 RETURN_ON_ERROR(ret); 713 RETURN_SUCCESS; 714 } 715 if (ctx->u.rep) 716 MARK_POP_KEEP(ctx->lastmark); 717 LASTMARK_RESTORE(); 718 } 719 if (ctx->u.rep) 720 MARK_POP_DISCARD(ctx->lastmark); 721 RETURN_FAILURE; 722 723 case SRE_OP_REPEAT_ONE: 724 /* match repeated sequence (maximizing regexp) */ 725 726 /* this operator only works if the repeated item is 727 exactly one character wide, and we're not already 728 collecting backtracking points. for other cases, 729 use the MAX_REPEAT operator */ 730 731 /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ 732 733 TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, 734 ctx->pattern[1], ctx->pattern[2])); 735 736 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr) 737 RETURN_FAILURE; /* cannot match */ 738 739 state->ptr = ctx->ptr; 740 741 ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[2]); 742 RETURN_ON_ERROR(ret); 743 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); 744 ctx->count = ret; 745 ctx->ptr += ctx->count; 746 747 /* when we arrive here, count contains the number of 748 matches, and ctx->ptr points to the tail of the target 749 string. check if the rest of the pattern matches, 750 and backtrack if not. */ 751 752 if (ctx->count < (Py_ssize_t) ctx->pattern[1]) 753 RETURN_FAILURE; 754 755 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS && 756 ctx->ptr == state->end) { 757 /* tail is empty. we're finished */ 758 state->ptr = ctx->ptr; 759 RETURN_SUCCESS; 760 } 761 762 LASTMARK_SAVE(); 763 764 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) { 765 /* tail starts with a literal. skip positions where 766 the rest of the pattern cannot possibly match */ 767 ctx->u.chr = ctx->pattern[ctx->pattern[0]+1]; 768 for (;;) { 769 while (ctx->count >= (Py_ssize_t) ctx->pattern[1] && 770 (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) { 771 ctx->ptr--; 772 ctx->count--; 773 } 774 if (ctx->count < (Py_ssize_t) ctx->pattern[1]) 775 break; 776 state->ptr = ctx->ptr; 777 DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1, 778 ctx->pattern+ctx->pattern[0]); 779 if (ret) { 780 RETURN_ON_ERROR(ret); 781 RETURN_SUCCESS; 782 } 783 784 LASTMARK_RESTORE(); 785 786 ctx->ptr--; 787 ctx->count--; 788 } 789 790 } else { 791 /* general case */ 792 while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) { 793 state->ptr = ctx->ptr; 794 DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2, 795 ctx->pattern+ctx->pattern[0]); 796 if (ret) { 797 RETURN_ON_ERROR(ret); 798 RETURN_SUCCESS; 799 } 800 ctx->ptr--; 801 ctx->count--; 802 LASTMARK_RESTORE(); 803 } 804 } 805 RETURN_FAILURE; 806 807 case SRE_OP_MIN_REPEAT_ONE: 808 /* match repeated sequence (minimizing regexp) */ 809 810 /* this operator only works if the repeated item is 811 exactly one character wide, and we're not already 812 collecting backtracking points. for other cases, 813 use the MIN_REPEAT operator */ 814 815 /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ 816 817 TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr, 818 ctx->pattern[1], ctx->pattern[2])); 819 820 if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr) 821 RETURN_FAILURE; /* cannot match */ 822 823 state->ptr = ctx->ptr; 824 825 if (ctx->pattern[1] == 0) 826 ctx->count = 0; 827 else { 828 /* count using pattern min as the maximum */ 829 ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[1]); 830 RETURN_ON_ERROR(ret); 831 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); 832 if (ret < (Py_ssize_t) ctx->pattern[1]) 833 /* didn't match minimum number of times */ 834 RETURN_FAILURE; 835 /* advance past minimum matches of repeat */ 836 ctx->count = ret; 837 ctx->ptr += ctx->count; 838 } 839 840 if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS && 841 (!match_all || ctx->ptr == state->end)) { 842 /* tail is empty. we're finished */ 843 state->ptr = ctx->ptr; 844 RETURN_SUCCESS; 845 846 } else { 847 /* general case */ 848 LASTMARK_SAVE(); 849 while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT 850 || ctx->count <= (Py_ssize_t)ctx->pattern[2]) { 851 state->ptr = ctx->ptr; 852 DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, 853 ctx->pattern+ctx->pattern[0]); 854 if (ret) { 855 RETURN_ON_ERROR(ret); 856 RETURN_SUCCESS; 857 } 858 state->ptr = ctx->ptr; 859 ret = SRE(count)(state, ctx->pattern+3, 1); 860 RETURN_ON_ERROR(ret); 861 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); 862 if (ret == 0) 863 break; 864 assert(ret == 1); 865 ctx->ptr++; 866 ctx->count++; 867 LASTMARK_RESTORE(); 868 } 869 } 870 RETURN_FAILURE; 871 872 case SRE_OP_REPEAT: 873 /* create repeat context. all the hard work is done 874 by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ 875 /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */ 876 TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr, 877 ctx->pattern[1], ctx->pattern[2])); 878 879 /* install new repeat context */ 880 ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep)); 881 if (!ctx->u.rep) { 882 PyErr_NoMemory(); 883 RETURN_FAILURE; 884 } 885 ctx->u.rep->count = -1; 886 ctx->u.rep->pattern = ctx->pattern; 887 ctx->u.rep->prev = state->repeat; 888 ctx->u.rep->last_ptr = NULL; 889 state->repeat = ctx->u.rep; 890 891 state->ptr = ctx->ptr; 892 DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]); 893 state->repeat = ctx->u.rep->prev; 894 PyObject_FREE(ctx->u.rep); 895 896 if (ret) { 897 RETURN_ON_ERROR(ret); 898 RETURN_SUCCESS; 899 } 900 RETURN_FAILURE; 901 902 case SRE_OP_MAX_UNTIL: 903 /* maximizing repeat */ 904 /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */ 905 906 /* FIXME: we probably need to deal with zero-width 907 matches in here... */ 908 909 ctx->u.rep = state->repeat; 910 if (!ctx->u.rep) 911 RETURN_ERROR(SRE_ERROR_STATE); 912 913 state->ptr = ctx->ptr; 914 915 ctx->count = ctx->u.rep->count+1; 916 917 TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern, 918 ctx->ptr, ctx->count)); 919 920 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { 921 /* not enough matches */ 922 ctx->u.rep->count = ctx->count; 923 DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, 924 ctx->u.rep->pattern+3); 925 if (ret) { 926 RETURN_ON_ERROR(ret); 927 RETURN_SUCCESS; 928 } 929 ctx->u.rep->count = ctx->count-1; 930 state->ptr = ctx->ptr; 931 RETURN_FAILURE; 932 } 933 934 if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] || 935 ctx->u.rep->pattern[2] == SRE_MAXREPEAT) && 936 state->ptr != ctx->u.rep->last_ptr) { 937 /* we may have enough matches, but if we can 938 match another item, do so */ 939 ctx->u.rep->count = ctx->count; 940 LASTMARK_SAVE(); 941 MARK_PUSH(ctx->lastmark); 942 /* zero-width match protection */ 943 DATA_PUSH(&ctx->u.rep->last_ptr); 944 ctx->u.rep->last_ptr = state->ptr; 945 DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, 946 ctx->u.rep->pattern+3); 947 DATA_POP(&ctx->u.rep->last_ptr); 948 if (ret) { 949 MARK_POP_DISCARD(ctx->lastmark); 950 RETURN_ON_ERROR(ret); 951 RETURN_SUCCESS; 952 } 953 MARK_POP(ctx->lastmark); 954 LASTMARK_RESTORE(); 955 ctx->u.rep->count = ctx->count-1; 956 state->ptr = ctx->ptr; 957 } 958 959 /* cannot match more repeated items here. make sure the 960 tail matches */ 961 state->repeat = ctx->u.rep->prev; 962 DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern); 963 RETURN_ON_SUCCESS(ret); 964 state->repeat = ctx->u.rep; 965 state->ptr = ctx->ptr; 966 RETURN_FAILURE; 967 968 case SRE_OP_MIN_UNTIL: 969 /* minimizing repeat */ 970 /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */ 971 972 ctx->u.rep = state->repeat; 973 if (!ctx->u.rep) 974 RETURN_ERROR(SRE_ERROR_STATE); 975 976 state->ptr = ctx->ptr; 977 978 ctx->count = ctx->u.rep->count+1; 979 980 TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern, 981 ctx->ptr, ctx->count, ctx->u.rep->pattern)); 982 983 if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { 984 /* not enough matches */ 985 ctx->u.rep->count = ctx->count; 986 DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, 987 ctx->u.rep->pattern+3); 988 if (ret) { 989 RETURN_ON_ERROR(ret); 990 RETURN_SUCCESS; 991 } 992 ctx->u.rep->count = ctx->count-1; 993 state->ptr = ctx->ptr; 994 RETURN_FAILURE; 995 } 996 997 LASTMARK_SAVE(); 998 999 /* see if the tail matches */ 1000 state->repeat = ctx->u.rep->prev; 1001 DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern); 1002 if (ret) { 1003 RETURN_ON_ERROR(ret); 1004 RETURN_SUCCESS; 1005 } 1006 1007 state->repeat = ctx->u.rep; 1008 state->ptr = ctx->ptr; 1009 1010 LASTMARK_RESTORE(); 1011 1012 if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2] 1013 && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) || 1014 state->ptr == ctx->u.rep->last_ptr) 1015 RETURN_FAILURE; 1016 1017 ctx->u.rep->count = ctx->count; 1018 /* zero-width match protection */ 1019 DATA_PUSH(&ctx->u.rep->last_ptr); 1020 ctx->u.rep->last_ptr = state->ptr; 1021 DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, 1022 ctx->u.rep->pattern+3); 1023 DATA_POP(&ctx->u.rep->last_ptr); 1024 if (ret) { 1025 RETURN_ON_ERROR(ret); 1026 RETURN_SUCCESS; 1027 } 1028 ctx->u.rep->count = ctx->count-1; 1029 state->ptr = ctx->ptr; 1030 RETURN_FAILURE; 1031 1032 case SRE_OP_GROUPREF: 1033 /* match backreference */ 1034 TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern, 1035 ctx->ptr, ctx->pattern[0])); 1036 i = ctx->pattern[0]; 1037 { 1038 Py_ssize_t groupref = i+i; 1039 if (groupref >= state->lastmark) { 1040 RETURN_FAILURE; 1041 } else { 1042 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; 1043 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; 1044 if (!p || !e || e < p) 1045 RETURN_FAILURE; 1046 while (p < e) { 1047 if (ctx->ptr >= end || *ctx->ptr != *p) 1048 RETURN_FAILURE; 1049 p++; 1050 ctx->ptr++; 1051 } 1052 } 1053 } 1054 ctx->pattern++; 1055 break; 1056 1057 case SRE_OP_GROUPREF_IGNORE: 1058 /* match backreference */ 1059 TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern, 1060 ctx->ptr, ctx->pattern[0])); 1061 i = ctx->pattern[0]; 1062 { 1063 Py_ssize_t groupref = i+i; 1064 if (groupref >= state->lastmark) { 1065 RETURN_FAILURE; 1066 } else { 1067 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; 1068 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; 1069 if (!p || !e || e < p) 1070 RETURN_FAILURE; 1071 while (p < e) { 1072 if (ctx->ptr >= end || 1073 state->lower(*ctx->ptr) != state->lower(*p)) 1074 RETURN_FAILURE; 1075 p++; 1076 ctx->ptr++; 1077 } 1078 } 1079 } 1080 ctx->pattern++; 1081 break; 1082 1083 case SRE_OP_GROUPREF_EXISTS: 1084 TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern, 1085 ctx->ptr, ctx->pattern[0])); 1086 /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */ 1087 i = ctx->pattern[0]; 1088 { 1089 Py_ssize_t groupref = i+i; 1090 if (groupref >= state->lastmark) { 1091 ctx->pattern += ctx->pattern[1]; 1092 break; 1093 } else { 1094 SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; 1095 SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; 1096 if (!p || !e || e < p) { 1097 ctx->pattern += ctx->pattern[1]; 1098 break; 1099 } 1100 } 1101 } 1102 ctx->pattern += 2; 1103 break; 1104 1105 case SRE_OP_ASSERT: 1106 /* assert subpattern */ 1107 /* <ASSERT> <skip> <back> <pattern> */ 1108 TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern, 1109 ctx->ptr, ctx->pattern[1])); 1110 if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1]) 1111 RETURN_FAILURE; 1112 state->ptr = ctx->ptr - ctx->pattern[1]; 1113 DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2); 1114 RETURN_ON_FAILURE(ret); 1115 ctx->pattern += ctx->pattern[0]; 1116 break; 1117 1118 case SRE_OP_ASSERT_NOT: 1119 /* assert not subpattern */ 1120 /* <ASSERT_NOT> <skip> <back> <pattern> */ 1121 TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern, 1122 ctx->ptr, ctx->pattern[1])); 1123 if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) { 1124 state->ptr = ctx->ptr - ctx->pattern[1]; 1125 DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2); 1126 if (ret) { 1127 RETURN_ON_ERROR(ret); 1128 RETURN_FAILURE; 1129 } 1130 } 1131 ctx->pattern += ctx->pattern[0]; 1132 break; 1133 1134 case SRE_OP_FAILURE: 1135 /* immediate failure */ 1136 TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr)); 1137 RETURN_FAILURE; 1138 1139 default: 1140 TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr, 1141 ctx->pattern[-1])); 1142 RETURN_ERROR(SRE_ERROR_ILLEGAL); 1143 } 1144 } 1145 1146exit: 1147 ctx_pos = ctx->last_ctx_pos; 1148 jump = ctx->jump; 1149 DATA_POP_DISCARD(ctx); 1150 if (ctx_pos == -1) 1151 return ret; 1152 DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); 1153 1154 switch (jump) { 1155 case JUMP_MAX_UNTIL_2: 1156 TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr)); 1157 goto jump_max_until_2; 1158 case JUMP_MAX_UNTIL_3: 1159 TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr)); 1160 goto jump_max_until_3; 1161 case JUMP_MIN_UNTIL_2: 1162 TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr)); 1163 goto jump_min_until_2; 1164 case JUMP_MIN_UNTIL_3: 1165 TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr)); 1166 goto jump_min_until_3; 1167 case JUMP_BRANCH: 1168 TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr)); 1169 goto jump_branch; 1170 case JUMP_MAX_UNTIL_1: 1171 TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr)); 1172 goto jump_max_until_1; 1173 case JUMP_MIN_UNTIL_1: 1174 TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr)); 1175 goto jump_min_until_1; 1176 case JUMP_REPEAT: 1177 TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr)); 1178 goto jump_repeat; 1179 case JUMP_REPEAT_ONE_1: 1180 TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr)); 1181 goto jump_repeat_one_1; 1182 case JUMP_REPEAT_ONE_2: 1183 TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr)); 1184 goto jump_repeat_one_2; 1185 case JUMP_MIN_REPEAT_ONE: 1186 TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr)); 1187 goto jump_min_repeat_one; 1188 case JUMP_ASSERT: 1189 TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr)); 1190 goto jump_assert; 1191 case JUMP_ASSERT_NOT: 1192 TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr)); 1193 goto jump_assert_not; 1194 case JUMP_NONE: 1195 TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern, 1196 ctx->ptr, ret)); 1197 break; 1198 } 1199 1200 return ret; /* should never get here */ 1201} 1202 1203LOCAL(Py_ssize_t) 1204SRE(search)(SRE_STATE* state, SRE_CODE* pattern) 1205{ 1206 SRE_CHAR* ptr = (SRE_CHAR *)state->start; 1207 SRE_CHAR* end = (SRE_CHAR *)state->end; 1208 Py_ssize_t status = 0; 1209 Py_ssize_t prefix_len = 0; 1210 Py_ssize_t prefix_skip = 0; 1211 SRE_CODE* prefix = NULL; 1212 SRE_CODE* charset = NULL; 1213 SRE_CODE* overlap = NULL; 1214 int flags = 0; 1215 1216 if (ptr > end) 1217 return 0; 1218 1219 if (pattern[0] == SRE_OP_INFO) { 1220 /* optimization info block */ 1221 /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */ 1222 1223 flags = pattern[2]; 1224 1225 if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) { 1226 TRACE(("reject (got %u chars, need %u)\n", 1227 (unsigned int)(end - ptr), pattern[3])); 1228 return 0; 1229 } 1230 if (pattern[3] > 1) { 1231 /* adjust end point (but make sure we leave at least one 1232 character in there, so literal search will work) */ 1233 end -= pattern[3] - 1; 1234 if (end <= ptr) 1235 end = ptr; 1236 } 1237 1238 if (flags & SRE_INFO_PREFIX) { 1239 /* pattern starts with a known prefix */ 1240 /* <length> <skip> <prefix data> <overlap data> */ 1241 prefix_len = pattern[5]; 1242 prefix_skip = pattern[6]; 1243 prefix = pattern + 7; 1244 overlap = prefix + prefix_len - 1; 1245 } else if (flags & SRE_INFO_CHARSET) 1246 /* pattern starts with a character from a known set */ 1247 /* <charset> */ 1248 charset = pattern + 5; 1249 1250 pattern += 1 + pattern[1]; 1251 } 1252 1253 TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n", 1254 prefix, prefix_len, prefix_skip)); 1255 TRACE(("charset = %p\n", charset)); 1256 1257 if (prefix_len == 1) { 1258 /* pattern starts with a literal character */ 1259 SRE_CHAR c = (SRE_CHAR) prefix[0]; 1260#if SIZEOF_SRE_CHAR < 4 1261 if ((SRE_CODE) c != prefix[0]) 1262 return 0; /* literal can't match: doesn't fit in char width */ 1263#endif 1264 end = (SRE_CHAR *)state->end; 1265 while (ptr < end) { 1266 while (*ptr != c) { 1267 if (++ptr >= end) 1268 return 0; 1269 } 1270 TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); 1271 state->start = ptr; 1272 state->ptr = ptr + prefix_skip; 1273 if (flags & SRE_INFO_LITERAL) 1274 return 1; /* we got all of it */ 1275 status = SRE(match)(state, pattern + 2*prefix_skip, 0); 1276 if (status != 0) 1277 return status; 1278 ++ptr; 1279 } 1280 return 0; 1281 } 1282 1283 if (prefix_len > 1) { 1284 /* pattern starts with a known prefix. use the overlap 1285 table to skip forward as fast as we possibly can */ 1286 Py_ssize_t i = 0; 1287 1288 end = (SRE_CHAR *)state->end; 1289 if (prefix_len > end - ptr) 1290 return 0; 1291#if SIZEOF_SRE_CHAR < 4 1292 for (i = 0; i < prefix_len; i++) 1293 if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i]) 1294 return 0; /* literal can't match: doesn't fit in char width */ 1295#endif 1296 while (ptr < end) { 1297 SRE_CHAR c = (SRE_CHAR) prefix[0]; 1298 while (*ptr++ != c) { 1299 if (ptr >= end) 1300 return 0; 1301 } 1302 if (ptr >= end) 1303 return 0; 1304 1305 i = 1; 1306 do { 1307 if (*ptr == (SRE_CHAR) prefix[i]) { 1308 if (++i != prefix_len) { 1309 if (++ptr >= end) 1310 return 0; 1311 continue; 1312 } 1313 /* found a potential match */ 1314 TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr)); 1315 state->start = ptr - (prefix_len - 1); 1316 state->ptr = ptr - (prefix_len - prefix_skip - 1); 1317 if (flags & SRE_INFO_LITERAL) 1318 return 1; /* we got all of it */ 1319 status = SRE(match)(state, pattern + 2*prefix_skip, 0); 1320 if (status != 0) 1321 return status; 1322 /* close but no cigar -- try again */ 1323 if (++ptr >= end) 1324 return 0; 1325 } 1326 i = overlap[i]; 1327 } while (i != 0); 1328 } 1329 return 0; 1330 } 1331 1332 if (charset) { 1333 /* pattern starts with a character from a known set */ 1334 end = (SRE_CHAR *)state->end; 1335 for (;;) { 1336 while (ptr < end && !SRE(charset)(state, charset, *ptr)) 1337 ptr++; 1338 if (ptr >= end) 1339 return 0; 1340 TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr)); 1341 state->start = ptr; 1342 state->ptr = ptr; 1343 status = SRE(match)(state, pattern, 0); 1344 if (status != 0) 1345 break; 1346 ptr++; 1347 } 1348 } else { 1349 /* general case */ 1350 assert(ptr <= end); 1351 while (1) { 1352 TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); 1353 state->start = state->ptr = ptr; 1354 status = SRE(match)(state, pattern, 0); 1355 if (status != 0 || ptr >= end) 1356 break; 1357 ptr++; 1358 } 1359 } 1360 1361 return status; 1362} 1363 1364#undef SRE_CHAR 1365#undef SIZEOF_SRE_CHAR 1366#undef SRE 1367 1368/* vim:ts=4:sw=4:et 1369*/ 1370