1/*
2 * Secret Labs' Regular Expression Engine
3 *
4 * regular expression matching engine
5 *
6 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
7 *
8 * See the _sre.c file for information on usage and redistribution.
9 */
10
11/* String matching engine */
12
13/* This file is included three times, with different character settings */
14
15LOCAL(int)
16SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
17{
18    /* check if pointer is at given position */
19
20    Py_ssize_t thisp, thatp;
21
22    switch (at) {
23
24    case SRE_AT_BEGINNING:
25    case SRE_AT_BEGINNING_STRING:
26        return ((void*) ptr == state->beginning);
27
28    case SRE_AT_BEGINNING_LINE:
29        return ((void*) ptr == state->beginning ||
30                SRE_IS_LINEBREAK((int) ptr[-1]));
31
32    case SRE_AT_END:
33        return (((SRE_CHAR *)state->end - ptr == 1 &&
34                 SRE_IS_LINEBREAK((int) ptr[0])) ||
35                ((void*) ptr == state->end));
36
37    case SRE_AT_END_LINE:
38        return ((void*) ptr == state->end ||
39                SRE_IS_LINEBREAK((int) ptr[0]));
40
41    case SRE_AT_END_STRING:
42        return ((void*) ptr == state->end);
43
44    case SRE_AT_BOUNDARY:
45        if (state->beginning == state->end)
46            return 0;
47        thatp = ((void*) ptr > state->beginning) ?
48            SRE_IS_WORD((int) ptr[-1]) : 0;
49        thisp = ((void*) ptr < state->end) ?
50            SRE_IS_WORD((int) ptr[0]) : 0;
51        return thisp != thatp;
52
53    case SRE_AT_NON_BOUNDARY:
54        if (state->beginning == state->end)
55            return 0;
56        thatp = ((void*) ptr > state->beginning) ?
57            SRE_IS_WORD((int) ptr[-1]) : 0;
58        thisp = ((void*) ptr < state->end) ?
59            SRE_IS_WORD((int) ptr[0]) : 0;
60        return thisp == thatp;
61
62    case SRE_AT_LOC_BOUNDARY:
63        if (state->beginning == state->end)
64            return 0;
65        thatp = ((void*) ptr > state->beginning) ?
66            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
67        thisp = ((void*) ptr < state->end) ?
68            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
69        return thisp != thatp;
70
71    case SRE_AT_LOC_NON_BOUNDARY:
72        if (state->beginning == state->end)
73            return 0;
74        thatp = ((void*) ptr > state->beginning) ?
75            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
76        thisp = ((void*) ptr < state->end) ?
77            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
78        return thisp == thatp;
79
80    case SRE_AT_UNI_BOUNDARY:
81        if (state->beginning == state->end)
82            return 0;
83        thatp = ((void*) ptr > state->beginning) ?
84            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
85        thisp = ((void*) ptr < state->end) ?
86            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
87        return thisp != thatp;
88
89    case SRE_AT_UNI_NON_BOUNDARY:
90        if (state->beginning == state->end)
91            return 0;
92        thatp = ((void*) ptr > state->beginning) ?
93            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
94        thisp = ((void*) ptr < state->end) ?
95            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
96        return thisp == thatp;
97
98    }
99
100    return 0;
101}
102
103LOCAL(int)
104SRE(charset)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch)
105{
106    /* check if character is a member of the given set */
107
108    int ok = 1;
109
110    for (;;) {
111        switch (*set++) {
112
113        case SRE_OP_FAILURE:
114            return !ok;
115
116        case SRE_OP_LITERAL:
117            /* <LITERAL> <code> */
118            if (ch == set[0])
119                return ok;
120            set++;
121            break;
122
123        case SRE_OP_CATEGORY:
124            /* <CATEGORY> <code> */
125            if (sre_category(set[0], (int) ch))
126                return ok;
127            set++;
128            break;
129
130        case SRE_OP_CHARSET:
131            /* <CHARSET> <bitmap> */
132            if (ch < 256 &&
133                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1)))))
134                return ok;
135            set += 256/SRE_CODE_BITS;
136            break;
137
138        case SRE_OP_RANGE:
139            /* <RANGE> <lower> <upper> */
140            if (set[0] <= ch && ch <= set[1])
141                return ok;
142            set += 2;
143            break;
144
145        case SRE_OP_RANGE_IGNORE:
146            /* <RANGE_IGNORE> <lower> <upper> */
147        {
148            SRE_CODE uch;
149            /* ch is already lower cased */
150            if (set[0] <= ch && ch <= set[1])
151                return ok;
152            uch = state->upper(ch);
153            if (set[0] <= uch && uch <= set[1])
154                return ok;
155            set += 2;
156            break;
157        }
158
159        case SRE_OP_NEGATE:
160            ok = !ok;
161            break;
162
163        case SRE_OP_BIGCHARSET:
164            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
165        {
166            Py_ssize_t count, block;
167            count = *(set++);
168
169            if (ch < 0x10000u)
170                block = ((unsigned char*)set)[ch >> 8];
171            else
172                block = -1;
173            set += 256/sizeof(SRE_CODE);
174            if (block >=0 &&
175                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] &
176                    (1u << (ch & (SRE_CODE_BITS-1)))))
177                return ok;
178            set += count * (256/SRE_CODE_BITS);
179            break;
180        }
181
182        default:
183            /* internal error -- there's not much we can do about it
184               here, so let's just pretend it didn't match... */
185            return 0;
186        }
187    }
188}
189
190LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all);
191
192LOCAL(Py_ssize_t)
193SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
194{
195    SRE_CODE chr;
196    SRE_CHAR c;
197    SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
198    SRE_CHAR* end = (SRE_CHAR *)state->end;
199    Py_ssize_t i;
200
201    /* adjust end */
202    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
203        end = ptr + maxcount;
204
205    switch (pattern[0]) {
206
207    case SRE_OP_IN:
208        /* repeated set */
209        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
210        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr))
211            ptr++;
212        break;
213
214    case SRE_OP_ANY:
215        /* repeated dot wildcard. */
216        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
217        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
218            ptr++;
219        break;
220
221    case SRE_OP_ANY_ALL:
222        /* repeated dot wildcard.  skip to the end of the target
223           string, and backtrack from there */
224        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
225        ptr = end;
226        break;
227
228    case SRE_OP_LITERAL:
229        /* repeated literal */
230        chr = pattern[1];
231        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
232        c = (SRE_CHAR) chr;
233#if SIZEOF_SRE_CHAR < 4
234        if ((SRE_CODE) c != chr)
235            ; /* literal can't match: doesn't fit in char width */
236        else
237#endif
238        while (ptr < end && *ptr == c)
239            ptr++;
240        break;
241
242    case SRE_OP_LITERAL_IGNORE:
243        /* repeated literal */
244        chr = pattern[1];
245        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
246        while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
247            ptr++;
248        break;
249
250    case SRE_OP_NOT_LITERAL:
251        /* repeated non-literal */
252        chr = pattern[1];
253        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
254        c = (SRE_CHAR) chr;
255#if SIZEOF_SRE_CHAR < 4
256        if ((SRE_CODE) c != chr)
257            ptr = end; /* literal can't match: doesn't fit in char width */
258        else
259#endif
260        while (ptr < end && *ptr != c)
261            ptr++;
262        break;
263
264    case SRE_OP_NOT_LITERAL_IGNORE:
265        /* repeated non-literal */
266        chr = pattern[1];
267        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
268        while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
269            ptr++;
270        break;
271
272    default:
273        /* repeated single character pattern */
274        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
275        while ((SRE_CHAR*) state->ptr < end) {
276            i = SRE(match)(state, pattern, 0);
277            if (i < 0)
278                return i;
279            if (!i)
280                break;
281        }
282        TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
283               (SRE_CHAR*) state->ptr - ptr));
284        return (SRE_CHAR*) state->ptr - ptr;
285    }
286
287    TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
288           ptr - (SRE_CHAR*) state->ptr));
289    return ptr - (SRE_CHAR*) state->ptr;
290}
291
292#if 0 /* not used in this release */
293LOCAL(int)
294SRE(info)(SRE_STATE* state, SRE_CODE* pattern)
295{
296    /* check if an SRE_OP_INFO block matches at the current position.
297       returns the number of SRE_CODE objects to skip if successful, 0
298       if no match */
299
300    SRE_CHAR* end = (SRE_CHAR*) state->end;
301    SRE_CHAR* ptr = (SRE_CHAR*) state->ptr;
302    Py_ssize_t i;
303
304    /* check minimal length */
305    if (pattern[3] && end - ptr < pattern[3])
306        return 0;
307
308    /* check known prefix */
309    if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
310        /* <length> <skip> <prefix data> <overlap data> */
311        for (i = 0; i < pattern[5]; i++)
312            if ((SRE_CODE) ptr[i] != pattern[7 + i])
313                return 0;
314        return pattern[0] + 2 * pattern[6];
315    }
316    return pattern[0];
317}
318#endif
319
320/* The macros below should be used to protect recursive SRE(match)()
321 * calls that *failed* and do *not* return immediately (IOW, those
322 * that will backtrack). Explaining:
323 *
324 * - Recursive SRE(match)() returned true: that's usually a success
325 *   (besides atypical cases like ASSERT_NOT), therefore there's no
326 *   reason to restore lastmark;
327 *
328 * - Recursive SRE(match)() returned false but the current SRE(match)()
329 *   is returning to the caller: If the current SRE(match)() is the
330 *   top function of the recursion, returning false will be a matching
331 *   failure, and it doesn't matter where lastmark is pointing to.
332 *   If it's *not* the top function, it will be a recursive SRE(match)()
333 *   failure by itself, and the calling SRE(match)() will have to deal
334 *   with the failure by the same rules explained here (it will restore
335 *   lastmark by itself if necessary);
336 *
337 * - Recursive SRE(match)() returned false, and will continue the
338 *   outside 'for' loop: must be protected when breaking, since the next
339 *   OP could potentially depend on lastmark;
340 *
341 * - Recursive SRE(match)() returned false, and will be called again
342 *   inside a local for/while loop: must be protected between each
343 *   loop iteration, since the recursive SRE(match)() could do anything,
344 *   and could potentially depend on lastmark.
345 *
346 * For more information, check the discussion at SF patch #712900.
347 */
348#define LASTMARK_SAVE()     \
349    do { \
350        ctx->lastmark = state->lastmark; \
351        ctx->lastindex = state->lastindex; \
352    } while (0)
353#define LASTMARK_RESTORE()  \
354    do { \
355        state->lastmark = ctx->lastmark; \
356        state->lastindex = ctx->lastindex; \
357    } while (0)
358
359#define RETURN_ERROR(i) do { return i; } while(0)
360#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
361#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
362
363#define RETURN_ON_ERROR(i) \
364    do { if (i < 0) RETURN_ERROR(i); } while (0)
365#define RETURN_ON_SUCCESS(i) \
366    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
367#define RETURN_ON_FAILURE(i) \
368    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
369
370#define DATA_STACK_ALLOC(state, type, ptr) \
371do { \
372    alloc_pos = state->data_stack_base; \
373    TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \
374           "(%" PY_FORMAT_SIZE_T "d)\n", \
375           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \
376    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
377        int j = data_stack_grow(state, sizeof(type)); \
378        if (j < 0) return j; \
379        if (ctx_pos != -1) \
380            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
381    } \
382    ptr = (type*)(state->data_stack+alloc_pos); \
383    state->data_stack_base += sizeof(type); \
384} while (0)
385
386#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
387do { \
388    TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", Py_STRINGIFY(type), pos)); \
389    ptr = (type*)(state->data_stack+pos); \
390} while (0)
391
392#define DATA_STACK_PUSH(state, data, size) \
393do { \
394    TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \
395           "(%" PY_FORMAT_SIZE_T "d)\n", \
396           data, state->data_stack_base, size)); \
397    if (size > state->data_stack_size - state->data_stack_base) { \
398        int j = data_stack_grow(state, size); \
399        if (j < 0) return j; \
400        if (ctx_pos != -1) \
401            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \
402    } \
403    memcpy(state->data_stack+state->data_stack_base, data, size); \
404    state->data_stack_base += size; \
405} while (0)
406
407#define DATA_STACK_POP(state, data, size, discard) \
408do { \
409    TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \
410           "(%" PY_FORMAT_SIZE_T "d)\n", \
411           data, state->data_stack_base-size, size)); \
412    memcpy(data, state->data_stack+state->data_stack_base-size, size); \
413    if (discard) \
414        state->data_stack_base -= size; \
415} while (0)
416
417#define DATA_STACK_POP_DISCARD(state, size) \
418do { \
419    TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \
420           "(%" PY_FORMAT_SIZE_T "d)\n", \
421           state->data_stack_base-size, size)); \
422    state->data_stack_base -= size; \
423} while(0)
424
425#define DATA_PUSH(x) \
426    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
427#define DATA_POP(x) \
428    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
429#define DATA_POP_DISCARD(x) \
430    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
431#define DATA_ALLOC(t,p) \
432    DATA_STACK_ALLOC(state, t, p)
433#define DATA_LOOKUP_AT(t,p,pos) \
434    DATA_STACK_LOOKUP_AT(state,t,p,pos)
435
436#define MARK_PUSH(lastmark) \
437    do if (lastmark > 0) { \
438        i = lastmark; /* ctx->lastmark may change if reallocated */ \
439        DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
440    } while (0)
441#define MARK_POP(lastmark) \
442    do if (lastmark > 0) { \
443        DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
444    } while (0)
445#define MARK_POP_KEEP(lastmark) \
446    do if (lastmark > 0) { \
447        DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
448    } while (0)
449#define MARK_POP_DISCARD(lastmark) \
450    do if (lastmark > 0) { \
451        DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
452    } while (0)
453
454#define JUMP_NONE            0
455#define JUMP_MAX_UNTIL_1     1
456#define JUMP_MAX_UNTIL_2     2
457#define JUMP_MAX_UNTIL_3     3
458#define JUMP_MIN_UNTIL_1     4
459#define JUMP_MIN_UNTIL_2     5
460#define JUMP_MIN_UNTIL_3     6
461#define JUMP_REPEAT          7
462#define JUMP_REPEAT_ONE_1    8
463#define JUMP_REPEAT_ONE_2    9
464#define JUMP_MIN_REPEAT_ONE  10
465#define JUMP_BRANCH          11
466#define JUMP_ASSERT          12
467#define JUMP_ASSERT_NOT      13
468
469#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, matchall) \
470    DATA_ALLOC(SRE(match_context), nextctx); \
471    nextctx->last_ctx_pos = ctx_pos; \
472    nextctx->jump = jumpvalue; \
473    nextctx->pattern = nextpattern; \
474    nextctx->match_all = matchall; \
475    ctx_pos = alloc_pos; \
476    ctx = nextctx; \
477    goto entrance; \
478    jumplabel: \
479    while (0) /* gcc doesn't like labels at end of scopes */ \
480
481#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
482    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->match_all)
483
484#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \
485    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)
486
487typedef struct {
488    Py_ssize_t last_ctx_pos;
489    Py_ssize_t jump;
490    SRE_CHAR* ptr;
491    SRE_CODE* pattern;
492    Py_ssize_t count;
493    Py_ssize_t lastmark;
494    Py_ssize_t lastindex;
495    union {
496        SRE_CODE chr;
497        SRE_REPEAT* rep;
498    } u;
499    int match_all;
500} SRE(match_context);
501
502/* check if string matches the given pattern.  returns <0 for
503   error, 0 for failure, and 1 for success */
504LOCAL(Py_ssize_t)
505SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all)
506{
507    SRE_CHAR* end = (SRE_CHAR *)state->end;
508    Py_ssize_t alloc_pos, ctx_pos = -1;
509    Py_ssize_t i, ret = 0;
510    Py_ssize_t jump;
511    unsigned int sigcount=0;
512
513    SRE(match_context)* ctx;
514    SRE(match_context)* nextctx;
515
516    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
517
518    DATA_ALLOC(SRE(match_context), ctx);
519    ctx->last_ctx_pos = -1;
520    ctx->jump = JUMP_NONE;
521    ctx->pattern = pattern;
522    ctx->match_all = match_all;
523    ctx_pos = alloc_pos;
524
525entrance:
526
527    ctx->ptr = (SRE_CHAR *)state->ptr;
528
529    if (ctx->pattern[0] == SRE_OP_INFO) {
530        /* optimization info block */
531        /* <INFO> <1=skip> <2=flags> <3=min> ... */
532        if (ctx->pattern[3] && (uintptr_t)(end - ctx->ptr) < ctx->pattern[3]) {
533            TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, "
534                   "need %" PY_FORMAT_SIZE_T "d)\n",
535                   end - ctx->ptr, (Py_ssize_t) ctx->pattern[3]));
536            RETURN_FAILURE;
537        }
538        ctx->pattern += ctx->pattern[1] + 1;
539    }
540
541    for (;;) {
542        ++sigcount;
543        if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
544            RETURN_ERROR(SRE_ERROR_INTERRUPTED);
545
546        switch (*ctx->pattern++) {
547
548        case SRE_OP_MARK:
549            /* set mark */
550            /* <MARK> <gid> */
551            TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
552                   ctx->ptr, ctx->pattern[0]));
553            i = ctx->pattern[0];
554            if (i & 1)
555                state->lastindex = i/2 + 1;
556            if (i > state->lastmark) {
557                /* state->lastmark is the highest valid index in the
558                   state->mark array.  If it is increased by more than 1,
559                   the intervening marks must be set to NULL to signal
560                   that these marks have not been encountered. */
561                Py_ssize_t j = state->lastmark + 1;
562                while (j < i)
563                    state->mark[j++] = NULL;
564                state->lastmark = i;
565            }
566            state->mark[i] = ctx->ptr;
567            ctx->pattern++;
568            break;
569
570        case SRE_OP_LITERAL:
571            /* match literal string */
572            /* <LITERAL> <code> */
573            TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
574                   ctx->ptr, *ctx->pattern));
575            if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
576                RETURN_FAILURE;
577            ctx->pattern++;
578            ctx->ptr++;
579            break;
580
581        case SRE_OP_NOT_LITERAL:
582            /* match anything that is not literal character */
583            /* <NOT_LITERAL> <code> */
584            TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
585                   ctx->ptr, *ctx->pattern));
586            if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
587                RETURN_FAILURE;
588            ctx->pattern++;
589            ctx->ptr++;
590            break;
591
592        case SRE_OP_SUCCESS:
593            /* end of pattern */
594            TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
595            if (!ctx->match_all || ctx->ptr == state->end) {
596                state->ptr = ctx->ptr;
597                RETURN_SUCCESS;
598            }
599            RETURN_FAILURE;
600
601        case SRE_OP_AT:
602            /* match at given position */
603            /* <AT> <code> */
604            TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
605            if (!SRE(at)(state, ctx->ptr, *ctx->pattern))
606                RETURN_FAILURE;
607            ctx->pattern++;
608            break;
609
610        case SRE_OP_CATEGORY:
611            /* match at given category */
612            /* <CATEGORY> <code> */
613            TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
614                   ctx->ptr, *ctx->pattern));
615            if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
616                RETURN_FAILURE;
617            ctx->pattern++;
618            ctx->ptr++;
619            break;
620
621        case SRE_OP_ANY:
622            /* match anything (except a newline) */
623            /* <ANY> */
624            TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
625            if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
626                RETURN_FAILURE;
627            ctx->ptr++;
628            break;
629
630        case SRE_OP_ANY_ALL:
631            /* match anything */
632            /* <ANY_ALL> */
633            TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
634            if (ctx->ptr >= end)
635                RETURN_FAILURE;
636            ctx->ptr++;
637            break;
638
639        case SRE_OP_IN:
640            /* match set member (or non_member) */
641            /* <IN> <skip> <set> */
642            TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
643            if (ctx->ptr >= end ||
644                !SRE(charset)(state, ctx->pattern + 1, *ctx->ptr))
645                RETURN_FAILURE;
646            ctx->pattern += ctx->pattern[0];
647            ctx->ptr++;
648            break;
649
650        case SRE_OP_LITERAL_IGNORE:
651            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
652                   ctx->pattern, ctx->ptr, ctx->pattern[0]));
653            if (ctx->ptr >= end ||
654                state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
655                RETURN_FAILURE;
656            ctx->pattern++;
657            ctx->ptr++;
658            break;
659
660        case SRE_OP_NOT_LITERAL_IGNORE:
661            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
662                   ctx->pattern, ctx->ptr, *ctx->pattern));
663            if (ctx->ptr >= end ||
664                state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
665                RETURN_FAILURE;
666            ctx->pattern++;
667            ctx->ptr++;
668            break;
669
670        case SRE_OP_IN_IGNORE:
671            TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
672            if (ctx->ptr >= end
673                || !SRE(charset)(state, ctx->pattern+1,
674                                 (SRE_CODE)state->lower(*ctx->ptr)))
675                RETURN_FAILURE;
676            ctx->pattern += ctx->pattern[0];
677            ctx->ptr++;
678            break;
679
680        case SRE_OP_JUMP:
681        case SRE_OP_INFO:
682            /* jump forward */
683            /* <JUMP> <offset> */
684            TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
685                   ctx->ptr, ctx->pattern[0]));
686            ctx->pattern += ctx->pattern[0];
687            break;
688
689        case SRE_OP_BRANCH:
690            /* alternation */
691            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
692            TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
693            LASTMARK_SAVE();
694            ctx->u.rep = state->repeat;
695            if (ctx->u.rep)
696                MARK_PUSH(ctx->lastmark);
697            for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
698                if (ctx->pattern[1] == SRE_OP_LITERAL &&
699                    (ctx->ptr >= end ||
700                     (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
701                    continue;
702                if (ctx->pattern[1] == SRE_OP_IN &&
703                    (ctx->ptr >= end ||
704                     !SRE(charset)(state, ctx->pattern + 3,
705                                   (SRE_CODE) *ctx->ptr)))
706                    continue;
707                state->ptr = ctx->ptr;
708                DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
709                if (ret) {
710                    if (ctx->u.rep)
711                        MARK_POP_DISCARD(ctx->lastmark);
712                    RETURN_ON_ERROR(ret);
713                    RETURN_SUCCESS;
714                }
715                if (ctx->u.rep)
716                    MARK_POP_KEEP(ctx->lastmark);
717                LASTMARK_RESTORE();
718            }
719            if (ctx->u.rep)
720                MARK_POP_DISCARD(ctx->lastmark);
721            RETURN_FAILURE;
722
723        case SRE_OP_REPEAT_ONE:
724            /* match repeated sequence (maximizing regexp) */
725
726            /* this operator only works if the repeated item is
727               exactly one character wide, and we're not already
728               collecting backtracking points.  for other cases,
729               use the MAX_REPEAT operator */
730
731            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
732
733            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
734                   ctx->pattern[1], ctx->pattern[2]));
735
736            if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
737                RETURN_FAILURE; /* cannot match */
738
739            state->ptr = ctx->ptr;
740
741            ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[2]);
742            RETURN_ON_ERROR(ret);
743            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
744            ctx->count = ret;
745            ctx->ptr += ctx->count;
746
747            /* when we arrive here, count contains the number of
748               matches, and ctx->ptr points to the tail of the target
749               string.  check if the rest of the pattern matches,
750               and backtrack if not. */
751
752            if (ctx->count < (Py_ssize_t) ctx->pattern[1])
753                RETURN_FAILURE;
754
755            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
756                ctx->ptr == state->end) {
757                /* tail is empty.  we're finished */
758                state->ptr = ctx->ptr;
759                RETURN_SUCCESS;
760            }
761
762            LASTMARK_SAVE();
763
764            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
765                /* tail starts with a literal. skip positions where
766                   the rest of the pattern cannot possibly match */
767                ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
768                for (;;) {
769                    while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
770                           (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
771                        ctx->ptr--;
772                        ctx->count--;
773                    }
774                    if (ctx->count < (Py_ssize_t) ctx->pattern[1])
775                        break;
776                    state->ptr = ctx->ptr;
777                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
778                            ctx->pattern+ctx->pattern[0]);
779                    if (ret) {
780                        RETURN_ON_ERROR(ret);
781                        RETURN_SUCCESS;
782                    }
783
784                    LASTMARK_RESTORE();
785
786                    ctx->ptr--;
787                    ctx->count--;
788                }
789
790            } else {
791                /* general case */
792                while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
793                    state->ptr = ctx->ptr;
794                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
795                            ctx->pattern+ctx->pattern[0]);
796                    if (ret) {
797                        RETURN_ON_ERROR(ret);
798                        RETURN_SUCCESS;
799                    }
800                    ctx->ptr--;
801                    ctx->count--;
802                    LASTMARK_RESTORE();
803                }
804            }
805            RETURN_FAILURE;
806
807        case SRE_OP_MIN_REPEAT_ONE:
808            /* match repeated sequence (minimizing regexp) */
809
810            /* this operator only works if the repeated item is
811               exactly one character wide, and we're not already
812               collecting backtracking points.  for other cases,
813               use the MIN_REPEAT operator */
814
815            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
816
817            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
818                   ctx->pattern[1], ctx->pattern[2]));
819
820            if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
821                RETURN_FAILURE; /* cannot match */
822
823            state->ptr = ctx->ptr;
824
825            if (ctx->pattern[1] == 0)
826                ctx->count = 0;
827            else {
828                /* count using pattern min as the maximum */
829                ret = SRE(count)(state, ctx->pattern+3, ctx->pattern[1]);
830                RETURN_ON_ERROR(ret);
831                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
832                if (ret < (Py_ssize_t) ctx->pattern[1])
833                    /* didn't match minimum number of times */
834                    RETURN_FAILURE;
835                /* advance past minimum matches of repeat */
836                ctx->count = ret;
837                ctx->ptr += ctx->count;
838            }
839
840            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS &&
841                (!match_all || ctx->ptr == state->end)) {
842                /* tail is empty.  we're finished */
843                state->ptr = ctx->ptr;
844                RETURN_SUCCESS;
845
846            } else {
847                /* general case */
848                LASTMARK_SAVE();
849                while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
850                       || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
851                    state->ptr = ctx->ptr;
852                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
853                            ctx->pattern+ctx->pattern[0]);
854                    if (ret) {
855                        RETURN_ON_ERROR(ret);
856                        RETURN_SUCCESS;
857                    }
858                    state->ptr = ctx->ptr;
859                    ret = SRE(count)(state, ctx->pattern+3, 1);
860                    RETURN_ON_ERROR(ret);
861                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
862                    if (ret == 0)
863                        break;
864                    assert(ret == 1);
865                    ctx->ptr++;
866                    ctx->count++;
867                    LASTMARK_RESTORE();
868                }
869            }
870            RETURN_FAILURE;
871
872        case SRE_OP_REPEAT:
873            /* create repeat context.  all the hard work is done
874               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
875            /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
876            TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
877                   ctx->pattern[1], ctx->pattern[2]));
878
879            /* install new repeat context */
880            ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
881            if (!ctx->u.rep) {
882                PyErr_NoMemory();
883                RETURN_FAILURE;
884            }
885            ctx->u.rep->count = -1;
886            ctx->u.rep->pattern = ctx->pattern;
887            ctx->u.rep->prev = state->repeat;
888            ctx->u.rep->last_ptr = NULL;
889            state->repeat = ctx->u.rep;
890
891            state->ptr = ctx->ptr;
892            DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
893            state->repeat = ctx->u.rep->prev;
894            PyObject_FREE(ctx->u.rep);
895
896            if (ret) {
897                RETURN_ON_ERROR(ret);
898                RETURN_SUCCESS;
899            }
900            RETURN_FAILURE;
901
902        case SRE_OP_MAX_UNTIL:
903            /* maximizing repeat */
904            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
905
906            /* FIXME: we probably need to deal with zero-width
907               matches in here... */
908
909            ctx->u.rep = state->repeat;
910            if (!ctx->u.rep)
911                RETURN_ERROR(SRE_ERROR_STATE);
912
913            state->ptr = ctx->ptr;
914
915            ctx->count = ctx->u.rep->count+1;
916
917            TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
918                   ctx->ptr, ctx->count));
919
920            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
921                /* not enough matches */
922                ctx->u.rep->count = ctx->count;
923                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
924                        ctx->u.rep->pattern+3);
925                if (ret) {
926                    RETURN_ON_ERROR(ret);
927                    RETURN_SUCCESS;
928                }
929                ctx->u.rep->count = ctx->count-1;
930                state->ptr = ctx->ptr;
931                RETURN_FAILURE;
932            }
933
934            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
935                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
936                state->ptr != ctx->u.rep->last_ptr) {
937                /* we may have enough matches, but if we can
938                   match another item, do so */
939                ctx->u.rep->count = ctx->count;
940                LASTMARK_SAVE();
941                MARK_PUSH(ctx->lastmark);
942                /* zero-width match protection */
943                DATA_PUSH(&ctx->u.rep->last_ptr);
944                ctx->u.rep->last_ptr = state->ptr;
945                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
946                        ctx->u.rep->pattern+3);
947                DATA_POP(&ctx->u.rep->last_ptr);
948                if (ret) {
949                    MARK_POP_DISCARD(ctx->lastmark);
950                    RETURN_ON_ERROR(ret);
951                    RETURN_SUCCESS;
952                }
953                MARK_POP(ctx->lastmark);
954                LASTMARK_RESTORE();
955                ctx->u.rep->count = ctx->count-1;
956                state->ptr = ctx->ptr;
957            }
958
959            /* cannot match more repeated items here.  make sure the
960               tail matches */
961            state->repeat = ctx->u.rep->prev;
962            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
963            RETURN_ON_SUCCESS(ret);
964            state->repeat = ctx->u.rep;
965            state->ptr = ctx->ptr;
966            RETURN_FAILURE;
967
968        case SRE_OP_MIN_UNTIL:
969            /* minimizing repeat */
970            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
971
972            ctx->u.rep = state->repeat;
973            if (!ctx->u.rep)
974                RETURN_ERROR(SRE_ERROR_STATE);
975
976            state->ptr = ctx->ptr;
977
978            ctx->count = ctx->u.rep->count+1;
979
980            TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern,
981                   ctx->ptr, ctx->count, ctx->u.rep->pattern));
982
983            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
984                /* not enough matches */
985                ctx->u.rep->count = ctx->count;
986                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
987                        ctx->u.rep->pattern+3);
988                if (ret) {
989                    RETURN_ON_ERROR(ret);
990                    RETURN_SUCCESS;
991                }
992                ctx->u.rep->count = ctx->count-1;
993                state->ptr = ctx->ptr;
994                RETURN_FAILURE;
995            }
996
997            LASTMARK_SAVE();
998
999            /* see if the tail matches */
1000            state->repeat = ctx->u.rep->prev;
1001            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
1002            if (ret) {
1003                RETURN_ON_ERROR(ret);
1004                RETURN_SUCCESS;
1005            }
1006
1007            state->repeat = ctx->u.rep;
1008            state->ptr = ctx->ptr;
1009
1010            LASTMARK_RESTORE();
1011
1012            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1013                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1014                state->ptr == ctx->u.rep->last_ptr)
1015                RETURN_FAILURE;
1016
1017            ctx->u.rep->count = ctx->count;
1018            /* zero-width match protection */
1019            DATA_PUSH(&ctx->u.rep->last_ptr);
1020            ctx->u.rep->last_ptr = state->ptr;
1021            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1022                    ctx->u.rep->pattern+3);
1023            DATA_POP(&ctx->u.rep->last_ptr);
1024            if (ret) {
1025                RETURN_ON_ERROR(ret);
1026                RETURN_SUCCESS;
1027            }
1028            ctx->u.rep->count = ctx->count-1;
1029            state->ptr = ctx->ptr;
1030            RETURN_FAILURE;
1031
1032        case SRE_OP_GROUPREF:
1033            /* match backreference */
1034            TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
1035                   ctx->ptr, ctx->pattern[0]));
1036            i = ctx->pattern[0];
1037            {
1038                Py_ssize_t groupref = i+i;
1039                if (groupref >= state->lastmark) {
1040                    RETURN_FAILURE;
1041                } else {
1042                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1043                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1044                    if (!p || !e || e < p)
1045                        RETURN_FAILURE;
1046                    while (p < e) {
1047                        if (ctx->ptr >= end || *ctx->ptr != *p)
1048                            RETURN_FAILURE;
1049                        p++;
1050                        ctx->ptr++;
1051                    }
1052                }
1053            }
1054            ctx->pattern++;
1055            break;
1056
1057        case SRE_OP_GROUPREF_IGNORE:
1058            /* match backreference */
1059            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1060                   ctx->ptr, ctx->pattern[0]));
1061            i = ctx->pattern[0];
1062            {
1063                Py_ssize_t groupref = i+i;
1064                if (groupref >= state->lastmark) {
1065                    RETURN_FAILURE;
1066                } else {
1067                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1068                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1069                    if (!p || !e || e < p)
1070                        RETURN_FAILURE;
1071                    while (p < e) {
1072                        if (ctx->ptr >= end ||
1073                            state->lower(*ctx->ptr) != state->lower(*p))
1074                            RETURN_FAILURE;
1075                        p++;
1076                        ctx->ptr++;
1077                    }
1078                }
1079            }
1080            ctx->pattern++;
1081            break;
1082
1083        case SRE_OP_GROUPREF_EXISTS:
1084            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1085                   ctx->ptr, ctx->pattern[0]));
1086            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1087            i = ctx->pattern[0];
1088            {
1089                Py_ssize_t groupref = i+i;
1090                if (groupref >= state->lastmark) {
1091                    ctx->pattern += ctx->pattern[1];
1092                    break;
1093                } else {
1094                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1095                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1096                    if (!p || !e || e < p) {
1097                        ctx->pattern += ctx->pattern[1];
1098                        break;
1099                    }
1100                }
1101            }
1102            ctx->pattern += 2;
1103            break;
1104
1105        case SRE_OP_ASSERT:
1106            /* assert subpattern */
1107            /* <ASSERT> <skip> <back> <pattern> */
1108            TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1109                   ctx->ptr, ctx->pattern[1]));
1110            if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1])
1111                RETURN_FAILURE;
1112            state->ptr = ctx->ptr - ctx->pattern[1];
1113            DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2);
1114            RETURN_ON_FAILURE(ret);
1115            ctx->pattern += ctx->pattern[0];
1116            break;
1117
1118        case SRE_OP_ASSERT_NOT:
1119            /* assert not subpattern */
1120            /* <ASSERT_NOT> <skip> <back> <pattern> */
1121            TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1122                   ctx->ptr, ctx->pattern[1]));
1123            if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) {
1124                state->ptr = ctx->ptr - ctx->pattern[1];
1125                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
1126                if (ret) {
1127                    RETURN_ON_ERROR(ret);
1128                    RETURN_FAILURE;
1129                }
1130            }
1131            ctx->pattern += ctx->pattern[0];
1132            break;
1133
1134        case SRE_OP_FAILURE:
1135            /* immediate failure */
1136            TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1137            RETURN_FAILURE;
1138
1139        default:
1140            TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1141                   ctx->pattern[-1]));
1142            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1143        }
1144    }
1145
1146exit:
1147    ctx_pos = ctx->last_ctx_pos;
1148    jump = ctx->jump;
1149    DATA_POP_DISCARD(ctx);
1150    if (ctx_pos == -1)
1151        return ret;
1152    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos);
1153
1154    switch (jump) {
1155        case JUMP_MAX_UNTIL_2:
1156            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1157            goto jump_max_until_2;
1158        case JUMP_MAX_UNTIL_3:
1159            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1160            goto jump_max_until_3;
1161        case JUMP_MIN_UNTIL_2:
1162            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1163            goto jump_min_until_2;
1164        case JUMP_MIN_UNTIL_3:
1165            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1166            goto jump_min_until_3;
1167        case JUMP_BRANCH:
1168            TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1169            goto jump_branch;
1170        case JUMP_MAX_UNTIL_1:
1171            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1172            goto jump_max_until_1;
1173        case JUMP_MIN_UNTIL_1:
1174            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1175            goto jump_min_until_1;
1176        case JUMP_REPEAT:
1177            TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1178            goto jump_repeat;
1179        case JUMP_REPEAT_ONE_1:
1180            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
1181            goto jump_repeat_one_1;
1182        case JUMP_REPEAT_ONE_2:
1183            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
1184            goto jump_repeat_one_2;
1185        case JUMP_MIN_REPEAT_ONE:
1186            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
1187            goto jump_min_repeat_one;
1188        case JUMP_ASSERT:
1189            TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
1190            goto jump_assert;
1191        case JUMP_ASSERT_NOT:
1192            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
1193            goto jump_assert_not;
1194        case JUMP_NONE:
1195            TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
1196                   ctx->ptr, ret));
1197            break;
1198    }
1199
1200    return ret; /* should never get here */
1201}
1202
1203LOCAL(Py_ssize_t)
1204SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
1205{
1206    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1207    SRE_CHAR* end = (SRE_CHAR *)state->end;
1208    Py_ssize_t status = 0;
1209    Py_ssize_t prefix_len = 0;
1210    Py_ssize_t prefix_skip = 0;
1211    SRE_CODE* prefix = NULL;
1212    SRE_CODE* charset = NULL;
1213    SRE_CODE* overlap = NULL;
1214    int flags = 0;
1215
1216    if (ptr > end)
1217        return 0;
1218
1219    if (pattern[0] == SRE_OP_INFO) {
1220        /* optimization info block */
1221        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1222
1223        flags = pattern[2];
1224
1225        if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) {
1226            TRACE(("reject (got %u chars, need %u)\n",
1227                   (unsigned int)(end - ptr), pattern[3]));
1228            return 0;
1229        }
1230        if (pattern[3] > 1) {
1231            /* adjust end point (but make sure we leave at least one
1232               character in there, so literal search will work) */
1233            end -= pattern[3] - 1;
1234            if (end <= ptr)
1235                end = ptr;
1236        }
1237
1238        if (flags & SRE_INFO_PREFIX) {
1239            /* pattern starts with a known prefix */
1240            /* <length> <skip> <prefix data> <overlap data> */
1241            prefix_len = pattern[5];
1242            prefix_skip = pattern[6];
1243            prefix = pattern + 7;
1244            overlap = prefix + prefix_len - 1;
1245        } else if (flags & SRE_INFO_CHARSET)
1246            /* pattern starts with a character from a known set */
1247            /* <charset> */
1248            charset = pattern + 5;
1249
1250        pattern += 1 + pattern[1];
1251    }
1252
1253    TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n",
1254           prefix, prefix_len, prefix_skip));
1255    TRACE(("charset = %p\n", charset));
1256
1257    if (prefix_len == 1) {
1258        /* pattern starts with a literal character */
1259        SRE_CHAR c = (SRE_CHAR) prefix[0];
1260#if SIZEOF_SRE_CHAR < 4
1261        if ((SRE_CODE) c != prefix[0])
1262            return 0; /* literal can't match: doesn't fit in char width */
1263#endif
1264        end = (SRE_CHAR *)state->end;
1265        while (ptr < end) {
1266            while (*ptr != c) {
1267                if (++ptr >= end)
1268                    return 0;
1269            }
1270            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1271            state->start = ptr;
1272            state->ptr = ptr + prefix_skip;
1273            if (flags & SRE_INFO_LITERAL)
1274                return 1; /* we got all of it */
1275            status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1276            if (status != 0)
1277                return status;
1278            ++ptr;
1279        }
1280        return 0;
1281    }
1282
1283    if (prefix_len > 1) {
1284        /* pattern starts with a known prefix.  use the overlap
1285           table to skip forward as fast as we possibly can */
1286        Py_ssize_t i = 0;
1287
1288        end = (SRE_CHAR *)state->end;
1289        if (prefix_len > end - ptr)
1290            return 0;
1291#if SIZEOF_SRE_CHAR < 4
1292        for (i = 0; i < prefix_len; i++)
1293            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i])
1294                return 0; /* literal can't match: doesn't fit in char width */
1295#endif
1296        while (ptr < end) {
1297            SRE_CHAR c = (SRE_CHAR) prefix[0];
1298            while (*ptr++ != c) {
1299                if (ptr >= end)
1300                    return 0;
1301            }
1302            if (ptr >= end)
1303                return 0;
1304
1305            i = 1;
1306            do {
1307                if (*ptr == (SRE_CHAR) prefix[i]) {
1308                    if (++i != prefix_len) {
1309                        if (++ptr >= end)
1310                            return 0;
1311                        continue;
1312                    }
1313                    /* found a potential match */
1314                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1315                    state->start = ptr - (prefix_len - 1);
1316                    state->ptr = ptr - (prefix_len - prefix_skip - 1);
1317                    if (flags & SRE_INFO_LITERAL)
1318                        return 1; /* we got all of it */
1319                    status = SRE(match)(state, pattern + 2*prefix_skip, 0);
1320                    if (status != 0)
1321                        return status;
1322                    /* close but no cigar -- try again */
1323                    if (++ptr >= end)
1324                        return 0;
1325                }
1326                i = overlap[i];
1327            } while (i != 0);
1328        }
1329        return 0;
1330    }
1331
1332    if (charset) {
1333        /* pattern starts with a character from a known set */
1334        end = (SRE_CHAR *)state->end;
1335        for (;;) {
1336            while (ptr < end && !SRE(charset)(state, charset, *ptr))
1337                ptr++;
1338            if (ptr >= end)
1339                return 0;
1340            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1341            state->start = ptr;
1342            state->ptr = ptr;
1343            status = SRE(match)(state, pattern, 0);
1344            if (status != 0)
1345                break;
1346            ptr++;
1347        }
1348    } else {
1349        /* general case */
1350        assert(ptr <= end);
1351        while (1) {
1352            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1353            state->start = state->ptr = ptr;
1354            status = SRE(match)(state, pattern, 0);
1355            if (status != 0 || ptr >= end)
1356                break;
1357            ptr++;
1358        }
1359    }
1360
1361    return status;
1362}
1363
1364#undef SRE_CHAR
1365#undef SIZEOF_SRE_CHAR
1366#undef SRE
1367
1368/* vim:ts=4:sw=4:et
1369*/
1370