1/*
2 * Secret Labs' Regular Expression Engine
3 *
4 * regular expression matching engine
5
6  Copyright (c) 2015, Daryl McDaniel. All rights reserved.<BR>
7  Copyright (c) 2011, Intel Corporation. All rights reserved.<BR>
8  This program and the accompanying materials are licensed and made available under
9  the terms and conditions of the BSD License that accompanies this distribution.
10  The full text of the license may be found at
11  http://opensource.org/licenses/bsd-license.
12
13  THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
14  WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
15 *
16 * partial history:
17 * 1999-10-24 fl  created (based on existing template matcher code)
18 * 2000-03-06 fl  first alpha, sort of
19 * 2000-08-01 fl  fixes for 1.6b1
20 * 2000-08-07 fl  use PyOS_CheckStack() if available
21 * 2000-09-20 fl  added expand method
22 * 2001-03-20 fl  lots of fixes for 2.1b2
23 * 2001-04-15 fl  export copyright as Python attribute, not global
24 * 2001-04-28 fl  added __copy__ methods (work in progress)
25 * 2001-05-14 fl  fixes for 1.5.2 compatibility
26 * 2001-07-01 fl  added BIGCHARSET support (from Martin von Loewis)
27 * 2001-10-18 fl  fixed group reset issue (from Matthew Mueller)
28 * 2001-10-20 fl  added split primitive; reenable unicode for 1.6/2.0/2.1
29 * 2001-10-21 fl  added sub/subn primitive
30 * 2001-10-24 fl  added finditer primitive (for 2.2 only)
31 * 2001-12-07 fl  fixed memory leak in sub/subn (Guido van Rossum)
32 * 2002-11-09 fl  fixed empty sub/subn return type
33 * 2003-04-18 mvl fully support 4-byte codes
34 * 2003-10-17 gn  implemented non recursive scheme
35 *
36 * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
37 *
38 * This version of the SRE library can be redistributed under CNRI's
39 * Python 1.6 license.  For any other use, please contact Secret Labs
40 * AB (info@pythonware.com).
41 *
42 * Portions of this engine have been developed in cooperation with
43 * CNRI.  Hewlett-Packard provided funding for 1.6 integration and
44 * other compatibility work.
45 */
46
47/* Get rid of these macros to prevent collisions between EFI and Python in this file. */
48#undef  RETURN_ERROR
49#undef  RETURN_SUCCESS
50
51#ifndef SRE_RECURSIVE
52
53static char copyright[] =
54    " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB ";
55
56#define PY_SSIZE_T_CLEAN
57
58#include "Python.h"
59#include "structmember.h" /* offsetof */
60
61#include "sre.h"
62
63#include <ctype.h>
64
65/* name of this module, minus the leading underscore */
66#if !defined(SRE_MODULE)
67#define SRE_MODULE "sre"
68#endif
69
70#define SRE_PY_MODULE "re"
71
72/* defining this one enables tracing */
73#undef VERBOSE
74
75#if PY_VERSION_HEX >= 0x01060000
76#if PY_VERSION_HEX  < 0x02020000 || defined(Py_USING_UNICODE)
77/* defining this enables unicode support (default under 1.6a1 and later) */
78#define HAVE_UNICODE
79#endif
80#endif
81
82/* -------------------------------------------------------------------- */
83/* optional features */
84
85/* enables fast searching */
86#define USE_FAST_SEARCH
87
88/* enables aggressive inlining (always on for Visual C) */
89#undef USE_INLINE
90
91/* enables copy/deepcopy handling (work in progress) */
92#undef USE_BUILTIN_COPY
93
94#if PY_VERSION_HEX < 0x01060000
95#define PyObject_DEL(op) PyMem_DEL((op))
96#endif
97
98/* -------------------------------------------------------------------- */
99
100#if defined(_MSC_VER)
101#pragma optimize("gt", on) /* doesn't seem to make much difference... */
102#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */
103/* fastest possible local call under MSVC */
104#define LOCAL(type) static __inline type __fastcall
105#elif defined(USE_INLINE)
106#define LOCAL(type) static inline type
107#else
108#define LOCAL(type) static type
109#endif
110
111/* error codes */
112#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */
113#define SRE_ERROR_STATE -2 /* illegal state */
114#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */
115#define SRE_ERROR_MEMORY -9 /* out of memory */
116#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */
117
118#if defined(VERBOSE)
119#define TRACE(v) printf v
120#else
121#define TRACE(v)
122#endif
123
124/* -------------------------------------------------------------------- */
125/* search engine state */
126
127/* default character predicates (run sre_chars.py to regenerate tables) */
128
129#define SRE_DIGIT_MASK 1
130#define SRE_SPACE_MASK 2
131#define SRE_LINEBREAK_MASK 4
132#define SRE_ALNUM_MASK 8
133#define SRE_WORD_MASK 16
134
135/* FIXME: this assumes ASCII.  create tables in init_sre() instead */
136
137static char sre_char_info[128] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
1382, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
1390, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
14025, 25, 0, 0, 0, 0, 0, 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
14124, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0,
1420, 0, 16, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
14324, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 0, 0, 0 };
144
145static char sre_char_lower[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
14610, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
14727, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
14844, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
14961, 62, 63, 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
150108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
151122, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
152106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
153120, 121, 122, 123, 124, 125, 126, 127 };
154
155#define SRE_IS_DIGIT(ch)\
156    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_DIGIT_MASK) : 0)
157#define SRE_IS_SPACE(ch)\
158    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_SPACE_MASK) : 0)
159#define SRE_IS_LINEBREAK(ch)\
160    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_LINEBREAK_MASK) : 0)
161#define SRE_IS_ALNUM(ch)\
162    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_ALNUM_MASK) : 0)
163#define SRE_IS_WORD(ch)\
164    ((ch) < 128 ? (sre_char_info[(ch)] & SRE_WORD_MASK) : 0)
165
166static unsigned int sre_lower(unsigned int ch)
167{
168    return ((ch) < 128 ? (unsigned int)sre_char_lower[ch] : ch);
169}
170
171/* locale-specific character predicates */
172/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
173 * warnings when c's type supports only numbers < N+1 */
174#define SRE_LOC_IS_DIGIT(ch) (!((ch) & ~255) ? isdigit((ch)) : 0)
175#define SRE_LOC_IS_SPACE(ch) (!((ch) & ~255) ? isspace((ch)) : 0)
176#define SRE_LOC_IS_LINEBREAK(ch) ((ch) == '\n')
177#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0)
178#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_')
179
180static unsigned int sre_lower_locale(unsigned int ch)
181{
182    return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch);
183}
184
185/* unicode-specific character predicates */
186
187#if defined(HAVE_UNICODE)
188
189#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL((Py_UNICODE)(ch))
190#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE((Py_UNICODE)(ch))
191#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK((Py_UNICODE)(ch))
192#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM((Py_UNICODE)(ch))
193#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM((ch)) || (ch) == '_')
194
195static unsigned int sre_lower_unicode(unsigned int ch)
196{
197    return (unsigned int) Py_UNICODE_TOLOWER((Py_UNICODE)(ch));
198}
199
200#endif
201
202LOCAL(int)
203sre_category(SRE_CODE category, unsigned int ch)
204{
205    switch (category) {
206
207    case SRE_CATEGORY_DIGIT:
208        return SRE_IS_DIGIT(ch);
209    case SRE_CATEGORY_NOT_DIGIT:
210        return !SRE_IS_DIGIT(ch);
211    case SRE_CATEGORY_SPACE:
212        return SRE_IS_SPACE(ch);
213    case SRE_CATEGORY_NOT_SPACE:
214        return !SRE_IS_SPACE(ch);
215    case SRE_CATEGORY_WORD:
216        return SRE_IS_WORD(ch);
217    case SRE_CATEGORY_NOT_WORD:
218        return !SRE_IS_WORD(ch);
219    case SRE_CATEGORY_LINEBREAK:
220        return SRE_IS_LINEBREAK(ch);
221    case SRE_CATEGORY_NOT_LINEBREAK:
222        return !SRE_IS_LINEBREAK(ch);
223
224    case SRE_CATEGORY_LOC_WORD:
225        return SRE_LOC_IS_WORD(ch);
226    case SRE_CATEGORY_LOC_NOT_WORD:
227        return !SRE_LOC_IS_WORD(ch);
228
229#if defined(HAVE_UNICODE)
230    case SRE_CATEGORY_UNI_DIGIT:
231        return SRE_UNI_IS_DIGIT(ch);
232    case SRE_CATEGORY_UNI_NOT_DIGIT:
233        return !SRE_UNI_IS_DIGIT(ch);
234    case SRE_CATEGORY_UNI_SPACE:
235        return SRE_UNI_IS_SPACE(ch);
236    case SRE_CATEGORY_UNI_NOT_SPACE:
237        return !SRE_UNI_IS_SPACE(ch);
238    case SRE_CATEGORY_UNI_WORD:
239        return SRE_UNI_IS_WORD(ch);
240    case SRE_CATEGORY_UNI_NOT_WORD:
241        return !SRE_UNI_IS_WORD(ch);
242    case SRE_CATEGORY_UNI_LINEBREAK:
243        return SRE_UNI_IS_LINEBREAK(ch);
244    case SRE_CATEGORY_UNI_NOT_LINEBREAK:
245        return !SRE_UNI_IS_LINEBREAK(ch);
246#else
247    case SRE_CATEGORY_UNI_DIGIT:
248        return SRE_IS_DIGIT(ch);
249    case SRE_CATEGORY_UNI_NOT_DIGIT:
250        return !SRE_IS_DIGIT(ch);
251    case SRE_CATEGORY_UNI_SPACE:
252        return SRE_IS_SPACE(ch);
253    case SRE_CATEGORY_UNI_NOT_SPACE:
254        return !SRE_IS_SPACE(ch);
255    case SRE_CATEGORY_UNI_WORD:
256        return SRE_LOC_IS_WORD(ch);
257    case SRE_CATEGORY_UNI_NOT_WORD:
258        return !SRE_LOC_IS_WORD(ch);
259    case SRE_CATEGORY_UNI_LINEBREAK:
260        return SRE_IS_LINEBREAK(ch);
261    case SRE_CATEGORY_UNI_NOT_LINEBREAK:
262        return !SRE_IS_LINEBREAK(ch);
263#endif
264    }
265    return 0;
266}
267
268/* helpers */
269
270static void
271data_stack_dealloc(SRE_STATE* state)
272{
273    if (state->data_stack) {
274        PyMem_FREE(state->data_stack);
275        state->data_stack = NULL;
276    }
277    state->data_stack_size = state->data_stack_base = 0;
278}
279
280static int
281data_stack_grow(SRE_STATE* state, Py_ssize_t size)
282{
283    Py_ssize_t minsize, cursize;
284    minsize = state->data_stack_base+size;
285    cursize = state->data_stack_size;
286    if (cursize < minsize) {
287        void* stack;
288        cursize = minsize+minsize/4+1024;
289        TRACE(("allocate/grow stack %" PY_FORMAT_SIZE_T "d\n", cursize));
290        stack = PyMem_REALLOC(state->data_stack, cursize);
291        if (!stack) {
292            data_stack_dealloc(state);
293            return SRE_ERROR_MEMORY;
294        }
295        state->data_stack = (char *)stack;
296        state->data_stack_size = cursize;
297    }
298    return 0;
299}
300
301/* generate 8-bit version */
302
303#define SRE_CHAR unsigned char
304#define SRE_AT sre_at
305#define SRE_COUNT sre_count
306#define SRE_CHARSET sre_charset
307#define SRE_INFO sre_info
308#define SRE_MATCH sre_match
309#define SRE_MATCH_CONTEXT sre_match_context
310#define SRE_SEARCH sre_search
311#define SRE_LITERAL_TEMPLATE sre_literal_template
312
313#if defined(HAVE_UNICODE)
314
315#define SRE_RECURSIVE
316#include "_sre.c"
317#undef SRE_RECURSIVE
318
319#undef SRE_LITERAL_TEMPLATE
320#undef SRE_SEARCH
321#undef SRE_MATCH
322#undef SRE_MATCH_CONTEXT
323#undef SRE_INFO
324#undef SRE_CHARSET
325#undef SRE_COUNT
326#undef SRE_AT
327#undef SRE_CHAR
328
329/* generate 16-bit unicode version */
330
331#define SRE_CHAR Py_UNICODE
332#define SRE_AT sre_uat
333#define SRE_COUNT sre_ucount
334#define SRE_CHARSET sre_ucharset
335#define SRE_INFO sre_uinfo
336#define SRE_MATCH sre_umatch
337#define SRE_MATCH_CONTEXT sre_umatch_context
338#define SRE_SEARCH sre_usearch
339#define SRE_LITERAL_TEMPLATE sre_uliteral_template
340#endif
341
342#endif /* SRE_RECURSIVE */
343
344/* -------------------------------------------------------------------- */
345/* String matching engine */
346
347/* the following section is compiled twice, with different character
348   settings */
349
350LOCAL(int)
351SRE_AT(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
352{
353    /* check if pointer is at given position */
354
355    Py_ssize_t thisp, thatp;
356
357    switch (at) {
358
359    case SRE_AT_BEGINNING:
360    case SRE_AT_BEGINNING_STRING:
361        return ((void*) ptr == state->beginning);
362
363    case SRE_AT_BEGINNING_LINE:
364        return ((void*) ptr == state->beginning ||
365                SRE_IS_LINEBREAK((int) ptr[-1]));
366
367    case SRE_AT_END:
368        return (((void*) (ptr+1) == state->end &&
369                 SRE_IS_LINEBREAK((int) ptr[0])) ||
370                ((void*) ptr == state->end));
371
372    case SRE_AT_END_LINE:
373        return ((void*) ptr == state->end ||
374                SRE_IS_LINEBREAK((int) ptr[0]));
375
376    case SRE_AT_END_STRING:
377        return ((void*) ptr == state->end);
378
379    case SRE_AT_BOUNDARY:
380        if (state->beginning == state->end)
381            return 0;
382        thatp = ((void*) ptr > state->beginning) ?
383            SRE_IS_WORD((int) ptr[-1]) : 0;
384        thisp = ((void*) ptr < state->end) ?
385            SRE_IS_WORD((int) ptr[0]) : 0;
386        return thisp != thatp;
387
388    case SRE_AT_NON_BOUNDARY:
389        if (state->beginning == state->end)
390            return 0;
391        thatp = ((void*) ptr > state->beginning) ?
392            SRE_IS_WORD((int) ptr[-1]) : 0;
393        thisp = ((void*) ptr < state->end) ?
394            SRE_IS_WORD((int) ptr[0]) : 0;
395        return thisp == thatp;
396
397    case SRE_AT_LOC_BOUNDARY:
398        if (state->beginning == state->end)
399            return 0;
400        thatp = ((void*) ptr > state->beginning) ?
401            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
402        thisp = ((void*) ptr < state->end) ?
403            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
404        return thisp != thatp;
405
406    case SRE_AT_LOC_NON_BOUNDARY:
407        if (state->beginning == state->end)
408            return 0;
409        thatp = ((void*) ptr > state->beginning) ?
410            SRE_LOC_IS_WORD((int) ptr[-1]) : 0;
411        thisp = ((void*) ptr < state->end) ?
412            SRE_LOC_IS_WORD((int) ptr[0]) : 0;
413        return thisp == thatp;
414
415#if defined(HAVE_UNICODE)
416    case SRE_AT_UNI_BOUNDARY:
417        if (state->beginning == state->end)
418            return 0;
419        thatp = ((void*) ptr > state->beginning) ?
420            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
421        thisp = ((void*) ptr < state->end) ?
422            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
423        return thisp != thatp;
424
425    case SRE_AT_UNI_NON_BOUNDARY:
426        if (state->beginning == state->end)
427            return 0;
428        thatp = ((void*) ptr > state->beginning) ?
429            SRE_UNI_IS_WORD((int) ptr[-1]) : 0;
430        thisp = ((void*) ptr < state->end) ?
431            SRE_UNI_IS_WORD((int) ptr[0]) : 0;
432        return thisp == thatp;
433#endif
434
435    }
436
437    return 0;
438}
439
440LOCAL(int)
441SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
442{
443    /* check if character is a member of the given set */
444
445    int ok = 1;
446
447    for (;;) {
448        switch (*set++) {
449
450        case SRE_OP_FAILURE:
451            return !ok;
452
453        case SRE_OP_LITERAL:
454            /* <LITERAL> <code> */
455            if (ch == set[0])
456                return ok;
457            set++;
458            break;
459
460        case SRE_OP_CATEGORY:
461            /* <CATEGORY> <code> */
462            if (sre_category(set[0], (int) ch))
463                return ok;
464            set += 1;
465            break;
466
467        case SRE_OP_CHARSET:
468            if (sizeof(SRE_CODE) == 2) {
469                /* <CHARSET> <bitmap> (16 bits per code word) */
470                if (ch < 256 && (set[ch >> 4] & (1 << (ch & 15))))
471                    return ok;
472                set += 16;
473            }
474            else {
475                /* <CHARSET> <bitmap> (32 bits per code word) */
476                if (ch < 256 && (set[ch >> 5] & (1u << (ch & 31))))
477                    return ok;
478                set += 8;
479            }
480            break;
481
482        case SRE_OP_RANGE:
483            /* <RANGE> <lower> <upper> */
484            if (set[0] <= ch && ch <= set[1])
485                return ok;
486            set += 2;
487            break;
488
489        case SRE_OP_NEGATE:
490            ok = !ok;
491            break;
492
493        case SRE_OP_BIGCHARSET:
494            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
495        {
496            Py_ssize_t count, block;
497            count = *(set++);
498
499            if (sizeof(SRE_CODE) == 2) {
500                block = ((unsigned char*)set)[ch >> 8];
501                set += 128;
502                if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
503                    return ok;
504                set += count*16;
505            }
506            else {
507                /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
508                 * warnings when c's type supports only numbers < N+1 */
509                if (!(ch & ~65535))
510                    block = ((unsigned char*)set)[ch >> 8];
511                else
512                    block = -1;
513                set += 64;
514                if (block >=0 &&
515                    (set[block*8 + ((ch & 255)>>5)] & (1u << (ch & 31))))
516                    return ok;
517                set += count*8;
518            }
519            break;
520        }
521
522        default:
523            /* internal error -- there's not much we can do about it
524               here, so let's just pretend it didn't match... */
525            return 0;
526        }
527    }
528}
529
530LOCAL(Py_ssize_t) SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern);
531
532LOCAL(Py_ssize_t)
533SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
534{
535    SRE_CODE chr;
536    SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
537    SRE_CHAR* end = (SRE_CHAR *)state->end;
538    Py_ssize_t i;
539
540    /* adjust end */
541    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
542        end = ptr + maxcount;
543
544    switch (pattern[0]) {
545
546    case SRE_OP_IN:
547        /* repeated set */
548        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr));
549        while (ptr < end && SRE_CHARSET(pattern + 2, *ptr))
550            ptr++;
551        break;
552
553    case SRE_OP_ANY:
554        /* repeated dot wildcard. */
555        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr));
556        while (ptr < end && !SRE_IS_LINEBREAK(*ptr))
557            ptr++;
558        break;
559
560    case SRE_OP_ANY_ALL:
561        /* repeated dot wildcard.  skip to the end of the target
562           string, and backtrack from there */
563        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr));
564        ptr = end;
565        break;
566
567    case SRE_OP_LITERAL:
568        /* repeated literal */
569        chr = pattern[1];
570        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr));
571        while (ptr < end && (SRE_CODE) *ptr == chr)
572            ptr++;
573        break;
574
575    case SRE_OP_LITERAL_IGNORE:
576        /* repeated literal */
577        chr = pattern[1];
578        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
579        while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
580            ptr++;
581        break;
582
583    case SRE_OP_NOT_LITERAL:
584        /* repeated non-literal */
585        chr = pattern[1];
586        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr));
587        while (ptr < end && (SRE_CODE) *ptr != chr)
588            ptr++;
589        break;
590
591    case SRE_OP_NOT_LITERAL_IGNORE:
592        /* repeated non-literal */
593        chr = pattern[1];
594        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
595        while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
596            ptr++;
597        break;
598
599    default:
600        /* repeated single character pattern */
601        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr));
602        while ((SRE_CHAR*) state->ptr < end) {
603            i = SRE_MATCH(state, pattern);
604            if (i < 0)
605                return i;
606            if (!i)
607                break;
608        }
609        TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
610               (SRE_CHAR*) state->ptr - ptr));
611        return (SRE_CHAR*) state->ptr - ptr;
612    }
613
614    TRACE(("|%p|%p|COUNT %" PY_FORMAT_SIZE_T "d\n", pattern, ptr,
615           ptr - (SRE_CHAR*) state->ptr));
616    return ptr - (SRE_CHAR*) state->ptr;
617}
618
619#if 0 /* not used in this release */
620LOCAL(int)
621SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
622{
623    /* check if an SRE_OP_INFO block matches at the current position.
624       returns the number of SRE_CODE objects to skip if successful, 0
625       if no match */
626
627    SRE_CHAR* end = state->end;
628    SRE_CHAR* ptr = state->ptr;
629    Py_ssize_t i;
630
631    /* check minimal length */
632    if (pattern[3] && (end - ptr) < pattern[3])
633        return 0;
634
635    /* check known prefix */
636    if (pattern[2] & SRE_INFO_PREFIX && pattern[5] > 1) {
637        /* <length> <skip> <prefix data> <overlap data> */
638        for (i = 0; i < pattern[5]; i++)
639            if ((SRE_CODE) ptr[i] != pattern[7 + i])
640                return 0;
641        return pattern[0] + 2 * pattern[6];
642    }
643    return pattern[0];
644}
645#endif
646
647/* The macros below should be used to protect recursive SRE_MATCH()
648 * calls that *failed* and do *not* return immediately (IOW, those
649 * that will backtrack). Explaining:
650 *
651 * - Recursive SRE_MATCH() returned true: that's usually a success
652 *   (besides atypical cases like ASSERT_NOT), therefore there's no
653 *   reason to restore lastmark;
654 *
655 * - Recursive SRE_MATCH() returned false but the current SRE_MATCH()
656 *   is returning to the caller: If the current SRE_MATCH() is the
657 *   top function of the recursion, returning false will be a matching
658 *   failure, and it doesn't matter where lastmark is pointing to.
659 *   If it's *not* the top function, it will be a recursive SRE_MATCH()
660 *   failure by itself, and the calling SRE_MATCH() will have to deal
661 *   with the failure by the same rules explained here (it will restore
662 *   lastmark by itself if necessary);
663 *
664 * - Recursive SRE_MATCH() returned false, and will continue the
665 *   outside 'for' loop: must be protected when breaking, since the next
666 *   OP could potentially depend on lastmark;
667 *
668 * - Recursive SRE_MATCH() returned false, and will be called again
669 *   inside a local for/while loop: must be protected between each
670 *   loop iteration, since the recursive SRE_MATCH() could do anything,
671 *   and could potentially depend on lastmark.
672 *
673 * For more information, check the discussion at SF patch #712900.
674 */
675#define LASTMARK_SAVE()     \
676    do { \
677        ctx->lastmark = state->lastmark; \
678        ctx->lastindex = state->lastindex; \
679    } while (0)
680#define LASTMARK_RESTORE()  \
681    do { \
682        state->lastmark = ctx->lastmark; \
683        state->lastindex = ctx->lastindex; \
684    } while (0)
685
686#define RETURN_ERROR(i) do { return i; } while(0)
687#define RETURN_FAILURE do { ret = 0; goto exit; } while(0)
688#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0)
689
690#define RETURN_ON_ERROR(i) \
691    do { if (i < 0) RETURN_ERROR(i); } while (0)
692#define RETURN_ON_SUCCESS(i) \
693    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0)
694#define RETURN_ON_FAILURE(i) \
695    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0)
696
697#define SFY(x) #x
698
699#define DATA_STACK_ALLOC(state, type, ptr) \
700do { \
701    alloc_pos = state->data_stack_base; \
702    TRACE(("allocating %s in %" PY_FORMAT_SIZE_T "d " \
703           "(%" PY_FORMAT_SIZE_T "d)\n", \
704           SFY(type), alloc_pos, sizeof(type))); \
705    if (sizeof(type) > state->data_stack_size - alloc_pos) { \
706        int j = data_stack_grow(state, sizeof(type)); \
707        if (j < 0) return j; \
708        if (ctx_pos != -1) \
709            DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
710    } \
711    ptr = (type*)(state->data_stack+alloc_pos); \
712    state->data_stack_base += sizeof(type); \
713} while (0)
714
715#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \
716do { \
717    TRACE(("looking up %s at %" PY_FORMAT_SIZE_T "d\n", SFY(type), pos)); \
718    ptr = (type*)(state->data_stack+pos); \
719} while (0)
720
721#define DATA_STACK_PUSH(state, data, size) \
722do { \
723    TRACE(("copy data in %p to %" PY_FORMAT_SIZE_T "d " \
724           "(%" PY_FORMAT_SIZE_T "d)\n", \
725           data, state->data_stack_base, size)); \
726    if (size > state->data_stack_size - state->data_stack_base) { \
727        int j = data_stack_grow(state, size); \
728        if (j < 0) return j; \
729        if (ctx_pos != -1) \
730            DATA_STACK_LOOKUP_AT(state, SRE_MATCH_CONTEXT, ctx, ctx_pos); \
731    } \
732    memcpy(state->data_stack+state->data_stack_base, data, size); \
733    state->data_stack_base += size; \
734} while (0)
735
736#define DATA_STACK_POP(state, data, size, discard) \
737do { \
738    TRACE(("copy data to %p from %" PY_FORMAT_SIZE_T "d " \
739           "(%" PY_FORMAT_SIZE_T "d)\n", \
740           data, state->data_stack_base-size, size)); \
741    memcpy(data, state->data_stack+state->data_stack_base-size, size); \
742    if (discard) \
743        state->data_stack_base -= size; \
744} while (0)
745
746#define DATA_STACK_POP_DISCARD(state, size) \
747do { \
748    TRACE(("discard data from %" PY_FORMAT_SIZE_T "d " \
749           "(%" PY_FORMAT_SIZE_T "d)\n", \
750           state->data_stack_base-size, size)); \
751    state->data_stack_base -= size; \
752} while(0)
753
754#define DATA_PUSH(x) \
755    DATA_STACK_PUSH(state, (x), sizeof(*(x)))
756#define DATA_POP(x) \
757    DATA_STACK_POP(state, (x), sizeof(*(x)), 1)
758#define DATA_POP_DISCARD(x) \
759    DATA_STACK_POP_DISCARD(state, sizeof(*(x)))
760#define DATA_ALLOC(t,p) \
761    DATA_STACK_ALLOC(state, t, p)
762#define DATA_LOOKUP_AT(t,p,pos) \
763    DATA_STACK_LOOKUP_AT(state,t,p,pos)
764
765#define MARK_PUSH(lastmark) \
766    do if (lastmark > 0) { \
767        i = lastmark; /* ctx->lastmark may change if reallocated */ \
768        DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
769    } while (0)
770#define MARK_POP(lastmark) \
771    do if (lastmark > 0) { \
772        DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
773    } while (0)
774#define MARK_POP_KEEP(lastmark) \
775    do if (lastmark > 0) { \
776        DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
777    } while (0)
778#define MARK_POP_DISCARD(lastmark) \
779    do if (lastmark > 0) { \
780        DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
781    } while (0)
782
783#define JUMP_NONE            0
784#define JUMP_MAX_UNTIL_1     1
785#define JUMP_MAX_UNTIL_2     2
786#define JUMP_MAX_UNTIL_3     3
787#define JUMP_MIN_UNTIL_1     4
788#define JUMP_MIN_UNTIL_2     5
789#define JUMP_MIN_UNTIL_3     6
790#define JUMP_REPEAT          7
791#define JUMP_REPEAT_ONE_1    8
792#define JUMP_REPEAT_ONE_2    9
793#define JUMP_MIN_REPEAT_ONE  10
794#define JUMP_BRANCH          11
795#define JUMP_ASSERT          12
796#define JUMP_ASSERT_NOT      13
797
798#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \
799    DATA_ALLOC(SRE_MATCH_CONTEXT, nextctx); \
800    nextctx->last_ctx_pos = ctx_pos; \
801    nextctx->jump = jumpvalue; \
802    nextctx->pattern = nextpattern; \
803    ctx_pos = alloc_pos; \
804    ctx = nextctx; \
805    goto entrance; \
806    jumplabel: \
807    while (0) /* gcc doesn't like labels at end of scopes */ \
808
809typedef struct {
810    Py_ssize_t last_ctx_pos;
811    Py_ssize_t jump;
812    SRE_CHAR* ptr;
813    SRE_CODE* pattern;
814    Py_ssize_t count;
815    Py_ssize_t lastmark;
816    Py_ssize_t lastindex;
817    union {
818        SRE_CODE chr;
819        SRE_REPEAT* rep;
820    } u;
821} SRE_MATCH_CONTEXT;
822
823/* check if string matches the given pattern.  returns <0 for
824   error, 0 for failure, and 1 for success */
825LOCAL(Py_ssize_t)
826SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
827{
828    SRE_CHAR* end = (SRE_CHAR *)state->end;
829    Py_ssize_t alloc_pos, ctx_pos = -1;
830    Py_ssize_t i, ret = 0;
831    Py_ssize_t jump;
832    unsigned int sigcount=0;
833
834    SRE_MATCH_CONTEXT* ctx;
835    SRE_MATCH_CONTEXT* nextctx;
836
837    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr));
838
839    DATA_ALLOC(SRE_MATCH_CONTEXT, ctx);
840    ctx->last_ctx_pos = -1;
841    ctx->jump = JUMP_NONE;
842    ctx->pattern = pattern;
843    ctx_pos = alloc_pos;
844
845entrance:
846
847    ctx->ptr = (SRE_CHAR *)state->ptr;
848
849    if (ctx->pattern[0] == SRE_OP_INFO) {
850        /* optimization info block */
851        /* <INFO> <1=skip> <2=flags> <3=min> ... */
852        if (ctx->pattern[3] && (end - ctx->ptr) < ctx->pattern[3]) {
853            TRACE(("reject (got %" PY_FORMAT_SIZE_T "d chars, "
854                   "need %" PY_FORMAT_SIZE_T "d)\n",
855                   (end - ctx->ptr), (Py_ssize_t) ctx->pattern[3]));
856            RETURN_FAILURE;
857        }
858        ctx->pattern += ctx->pattern[1] + 1;
859    }
860
861    for (;;) {
862        ++sigcount;
863        if ((0 == (sigcount & 0xfff)) && PyErr_CheckSignals())
864            RETURN_ERROR(SRE_ERROR_INTERRUPTED);
865
866        switch (*ctx->pattern++) {
867
868        case SRE_OP_MARK:
869            /* set mark */
870            /* <MARK> <gid> */
871            TRACE(("|%p|%p|MARK %d\n", ctx->pattern,
872                   ctx->ptr, ctx->pattern[0]));
873            i = ctx->pattern[0];
874            if (i & 1)
875                state->lastindex = i/2 + 1;
876            if (i > state->lastmark) {
877                /* state->lastmark is the highest valid index in the
878                   state->mark array.  If it is increased by more than 1,
879                   the intervening marks must be set to NULL to signal
880                   that these marks have not been encountered. */
881                Py_ssize_t j = state->lastmark + 1;
882                while (j < i)
883                    state->mark[j++] = NULL;
884                state->lastmark = i;
885            }
886            state->mark[i] = ctx->ptr;
887            ctx->pattern++;
888            break;
889
890        case SRE_OP_LITERAL:
891            /* match literal string */
892            /* <LITERAL> <code> */
893            TRACE(("|%p|%p|LITERAL %d\n", ctx->pattern,
894                   ctx->ptr, *ctx->pattern));
895            if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] != ctx->pattern[0])
896                RETURN_FAILURE;
897            ctx->pattern++;
898            ctx->ptr++;
899            break;
900
901        case SRE_OP_NOT_LITERAL:
902            /* match anything that is not literal character */
903            /* <NOT_LITERAL> <code> */
904            TRACE(("|%p|%p|NOT_LITERAL %d\n", ctx->pattern,
905                   ctx->ptr, *ctx->pattern));
906            if (ctx->ptr >= end || (SRE_CODE) ctx->ptr[0] == ctx->pattern[0])
907                RETURN_FAILURE;
908            ctx->pattern++;
909            ctx->ptr++;
910            break;
911
912        case SRE_OP_SUCCESS:
913            /* end of pattern */
914            TRACE(("|%p|%p|SUCCESS\n", ctx->pattern, ctx->ptr));
915            state->ptr = ctx->ptr;
916            RETURN_SUCCESS;
917
918        case SRE_OP_AT:
919            /* match at given position */
920            /* <AT> <code> */
921            TRACE(("|%p|%p|AT %d\n", ctx->pattern, ctx->ptr, *ctx->pattern));
922            if (!SRE_AT(state, ctx->ptr, *ctx->pattern))
923                RETURN_FAILURE;
924            ctx->pattern++;
925            break;
926
927        case SRE_OP_CATEGORY:
928            /* match at given category */
929            /* <CATEGORY> <code> */
930            TRACE(("|%p|%p|CATEGORY %d\n", ctx->pattern,
931                   ctx->ptr, *ctx->pattern));
932            if (ctx->ptr >= end || !sre_category(ctx->pattern[0], ctx->ptr[0]))
933                RETURN_FAILURE;
934            ctx->pattern++;
935            ctx->ptr++;
936            break;
937
938        case SRE_OP_ANY:
939            /* match anything (except a newline) */
940            /* <ANY> */
941            TRACE(("|%p|%p|ANY\n", ctx->pattern, ctx->ptr));
942            if (ctx->ptr >= end || SRE_IS_LINEBREAK(ctx->ptr[0]))
943                RETURN_FAILURE;
944            ctx->ptr++;
945            break;
946
947        case SRE_OP_ANY_ALL:
948            /* match anything */
949            /* <ANY_ALL> */
950            TRACE(("|%p|%p|ANY_ALL\n", ctx->pattern, ctx->ptr));
951            if (ctx->ptr >= end)
952                RETURN_FAILURE;
953            ctx->ptr++;
954            break;
955
956        case SRE_OP_IN:
957            /* match set member (or non_member) */
958            /* <IN> <skip> <set> */
959            TRACE(("|%p|%p|IN\n", ctx->pattern, ctx->ptr));
960            if (ctx->ptr >= end || !SRE_CHARSET(ctx->pattern + 1, *ctx->ptr))
961                RETURN_FAILURE;
962            ctx->pattern += ctx->pattern[0];
963            ctx->ptr++;
964            break;
965
966        case SRE_OP_LITERAL_IGNORE:
967            TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
968                   ctx->pattern, ctx->ptr, ctx->pattern[0]));
969            if (ctx->ptr >= end ||
970                state->lower(*ctx->ptr) != state->lower(*ctx->pattern))
971                RETURN_FAILURE;
972            ctx->pattern++;
973            ctx->ptr++;
974            break;
975
976        case SRE_OP_NOT_LITERAL_IGNORE:
977            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
978                   ctx->pattern, ctx->ptr, *ctx->pattern));
979            if (ctx->ptr >= end ||
980                state->lower(*ctx->ptr) == state->lower(*ctx->pattern))
981                RETURN_FAILURE;
982            ctx->pattern++;
983            ctx->ptr++;
984            break;
985
986        case SRE_OP_IN_IGNORE:
987            TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
988            if (ctx->ptr >= end
989                || !SRE_CHARSET(ctx->pattern+1,
990                                (SRE_CODE)state->lower(*ctx->ptr)))
991                RETURN_FAILURE;
992            ctx->pattern += ctx->pattern[0];
993            ctx->ptr++;
994            break;
995
996        case SRE_OP_JUMP:
997        case SRE_OP_INFO:
998            /* jump forward */
999            /* <JUMP> <offset> */
1000            TRACE(("|%p|%p|JUMP %d\n", ctx->pattern,
1001                   ctx->ptr, ctx->pattern[0]));
1002            ctx->pattern += ctx->pattern[0];
1003            break;
1004
1005        case SRE_OP_BRANCH:
1006            /* alternation */
1007            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */
1008            TRACE(("|%p|%p|BRANCH\n", ctx->pattern, ctx->ptr));
1009            LASTMARK_SAVE();
1010            ctx->u.rep = state->repeat;
1011            if (ctx->u.rep)
1012                MARK_PUSH(ctx->lastmark);
1013            for (; ctx->pattern[0]; ctx->pattern += ctx->pattern[0]) {
1014                if (ctx->pattern[1] == SRE_OP_LITERAL &&
1015                    (ctx->ptr >= end ||
1016                     (SRE_CODE) *ctx->ptr != ctx->pattern[2]))
1017                    continue;
1018                if (ctx->pattern[1] == SRE_OP_IN &&
1019                    (ctx->ptr >= end ||
1020                     !SRE_CHARSET(ctx->pattern + 3, (SRE_CODE) *ctx->ptr)))
1021                    continue;
1022                state->ptr = ctx->ptr;
1023                DO_JUMP(JUMP_BRANCH, jump_branch, ctx->pattern+1);
1024                if (ret) {
1025                    if (ctx->u.rep)
1026                        MARK_POP_DISCARD(ctx->lastmark);
1027                    RETURN_ON_ERROR(ret);
1028                    RETURN_SUCCESS;
1029                }
1030                if (ctx->u.rep)
1031                    MARK_POP_KEEP(ctx->lastmark);
1032                LASTMARK_RESTORE();
1033            }
1034            if (ctx->u.rep)
1035                MARK_POP_DISCARD(ctx->lastmark);
1036            RETURN_FAILURE;
1037
1038        case SRE_OP_REPEAT_ONE:
1039            /* match repeated sequence (maximizing regexp) */
1040
1041            /* this operator only works if the repeated item is
1042               exactly one character wide, and we're not already
1043               collecting backtracking points.  for other cases,
1044               use the MAX_REPEAT operator */
1045
1046            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1047
1048            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1049                   ctx->pattern[1], ctx->pattern[2]));
1050
1051            if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
1052                RETURN_FAILURE; /* cannot match */
1053
1054            state->ptr = ctx->ptr;
1055
1056            ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[2]);
1057            RETURN_ON_ERROR(ret);
1058            DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1059            ctx->count = ret;
1060            ctx->ptr += ctx->count;
1061
1062            /* when we arrive here, count contains the number of
1063               matches, and ctx->ptr points to the tail of the target
1064               string.  check if the rest of the pattern matches,
1065               and backtrack if not. */
1066
1067            if (ctx->count < (Py_ssize_t) ctx->pattern[1])
1068                RETURN_FAILURE;
1069
1070            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
1071                /* tail is empty.  we're finished */
1072                state->ptr = ctx->ptr;
1073                RETURN_SUCCESS;
1074            }
1075
1076            LASTMARK_SAVE();
1077
1078            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_LITERAL) {
1079                /* tail starts with a literal. skip positions where
1080                   the rest of the pattern cannot possibly match */
1081                ctx->u.chr = ctx->pattern[ctx->pattern[0]+1];
1082                for (;;) {
1083                    while (ctx->count >= (Py_ssize_t) ctx->pattern[1] &&
1084                           (ctx->ptr >= end || *ctx->ptr != ctx->u.chr)) {
1085                        ctx->ptr--;
1086                        ctx->count--;
1087                    }
1088                    if (ctx->count < (Py_ssize_t) ctx->pattern[1])
1089                        break;
1090                    state->ptr = ctx->ptr;
1091                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1,
1092                            ctx->pattern+ctx->pattern[0]);
1093                    if (ret) {
1094                        RETURN_ON_ERROR(ret);
1095                        RETURN_SUCCESS;
1096                    }
1097
1098                    LASTMARK_RESTORE();
1099
1100                    ctx->ptr--;
1101                    ctx->count--;
1102                }
1103
1104            } else {
1105                /* general case */
1106                while (ctx->count >= (Py_ssize_t) ctx->pattern[1]) {
1107                    state->ptr = ctx->ptr;
1108                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2,
1109                            ctx->pattern+ctx->pattern[0]);
1110                    if (ret) {
1111                        RETURN_ON_ERROR(ret);
1112                        RETURN_SUCCESS;
1113                    }
1114                    ctx->ptr--;
1115                    ctx->count--;
1116                    LASTMARK_RESTORE();
1117                }
1118            }
1119            RETURN_FAILURE;
1120
1121        case SRE_OP_MIN_REPEAT_ONE:
1122            /* match repeated sequence (minimizing regexp) */
1123
1124            /* this operator only works if the repeated item is
1125               exactly one character wide, and we're not already
1126               collecting backtracking points.  for other cases,
1127               use the MIN_REPEAT operator */
1128
1129            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
1130
1131            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", ctx->pattern, ctx->ptr,
1132                   ctx->pattern[1], ctx->pattern[2]));
1133
1134            if ((Py_ssize_t) ctx->pattern[1] > end - ctx->ptr)
1135                RETURN_FAILURE; /* cannot match */
1136
1137            state->ptr = ctx->ptr;
1138
1139            if (ctx->pattern[1] == 0)
1140                ctx->count = 0;
1141            else {
1142                /* count using pattern min as the maximum */
1143                ret = SRE_COUNT(state, ctx->pattern+3, ctx->pattern[1]);
1144                RETURN_ON_ERROR(ret);
1145                DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1146                if (ret < (Py_ssize_t) ctx->pattern[1])
1147                    /* didn't match minimum number of times */
1148                    RETURN_FAILURE;
1149                /* advance past minimum matches of repeat */
1150                ctx->count = ret;
1151                ctx->ptr += ctx->count;
1152            }
1153
1154            if (ctx->pattern[ctx->pattern[0]] == SRE_OP_SUCCESS) {
1155                /* tail is empty.  we're finished */
1156                state->ptr = ctx->ptr;
1157                RETURN_SUCCESS;
1158
1159            } else {
1160                /* general case */
1161                LASTMARK_SAVE();
1162                while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
1163                       || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
1164                    state->ptr = ctx->ptr;
1165                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
1166                            ctx->pattern+ctx->pattern[0]);
1167                    if (ret) {
1168                        RETURN_ON_ERROR(ret);
1169                        RETURN_SUCCESS;
1170                    }
1171                    state->ptr = ctx->ptr;
1172                    ret = SRE_COUNT(state, ctx->pattern+3, 1);
1173                    RETURN_ON_ERROR(ret);
1174                    DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1175                    if (ret == 0)
1176                        break;
1177                    assert(ret == 1);
1178                    ctx->ptr++;
1179                    ctx->count++;
1180                    LASTMARK_RESTORE();
1181                }
1182            }
1183            RETURN_FAILURE;
1184
1185        case SRE_OP_REPEAT:
1186            /* create repeat context.  all the hard work is done
1187               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */
1188            /* <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail */
1189            TRACE(("|%p|%p|REPEAT %d %d\n", ctx->pattern, ctx->ptr,
1190                   ctx->pattern[1], ctx->pattern[2]));
1191
1192            /* install new repeat context */
1193            ctx->u.rep = (SRE_REPEAT*) PyObject_MALLOC(sizeof(*ctx->u.rep));
1194            if (!ctx->u.rep) {
1195                PyErr_NoMemory();
1196                RETURN_FAILURE;
1197            }
1198            ctx->u.rep->count = -1;
1199            ctx->u.rep->pattern = ctx->pattern;
1200            ctx->u.rep->prev = state->repeat;
1201            ctx->u.rep->last_ptr = NULL;
1202            state->repeat = ctx->u.rep;
1203
1204            state->ptr = ctx->ptr;
1205            DO_JUMP(JUMP_REPEAT, jump_repeat, ctx->pattern+ctx->pattern[0]);
1206            state->repeat = ctx->u.rep->prev;
1207            PyObject_FREE(ctx->u.rep);
1208
1209            if (ret) {
1210                RETURN_ON_ERROR(ret);
1211                RETURN_SUCCESS;
1212            }
1213            RETURN_FAILURE;
1214
1215        case SRE_OP_MAX_UNTIL:
1216            /* maximizing repeat */
1217            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */
1218
1219            /* FIXME: we probably need to deal with zero-width
1220               matches in here... */
1221
1222            ctx->u.rep = state->repeat;
1223            if (!ctx->u.rep)
1224                RETURN_ERROR(SRE_ERROR_STATE);
1225
1226            state->ptr = ctx->ptr;
1227
1228            ctx->count = ctx->u.rep->count+1;
1229
1230            TRACE(("|%p|%p|MAX_UNTIL %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
1231                   ctx->ptr, ctx->count));
1232
1233            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1234                /* not enough matches */
1235                ctx->u.rep->count = ctx->count;
1236                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1,
1237                        ctx->u.rep->pattern+3);
1238                if (ret) {
1239                    RETURN_ON_ERROR(ret);
1240                    RETURN_SUCCESS;
1241                }
1242                ctx->u.rep->count = ctx->count-1;
1243                state->ptr = ctx->ptr;
1244                RETURN_FAILURE;
1245            }
1246
1247            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] ||
1248                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
1249                state->ptr != ctx->u.rep->last_ptr) {
1250                /* we may have enough matches, but if we can
1251                   match another item, do so */
1252                ctx->u.rep->count = ctx->count;
1253                LASTMARK_SAVE();
1254                MARK_PUSH(ctx->lastmark);
1255                /* zero-width match protection */
1256                DATA_PUSH(&ctx->u.rep->last_ptr);
1257                ctx->u.rep->last_ptr = state->ptr;
1258                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2,
1259                        ctx->u.rep->pattern+3);
1260                DATA_POP(&ctx->u.rep->last_ptr);
1261                if (ret) {
1262                    MARK_POP_DISCARD(ctx->lastmark);
1263                    RETURN_ON_ERROR(ret);
1264                    RETURN_SUCCESS;
1265                }
1266                MARK_POP(ctx->lastmark);
1267                LASTMARK_RESTORE();
1268                ctx->u.rep->count = ctx->count-1;
1269                state->ptr = ctx->ptr;
1270            }
1271
1272            /* cannot match more repeated items here.  make sure the
1273               tail matches */
1274            state->repeat = ctx->u.rep->prev;
1275            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, ctx->pattern);
1276            RETURN_ON_SUCCESS(ret);
1277            state->repeat = ctx->u.rep;
1278            state->ptr = ctx->ptr;
1279            RETURN_FAILURE;
1280
1281        case SRE_OP_MIN_UNTIL:
1282            /* minimizing repeat */
1283            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */
1284
1285            ctx->u.rep = state->repeat;
1286            if (!ctx->u.rep)
1287                RETURN_ERROR(SRE_ERROR_STATE);
1288
1289            state->ptr = ctx->ptr;
1290
1291            ctx->count = ctx->u.rep->count+1;
1292
1293            TRACE(("|%p|%p|MIN_UNTIL %" PY_FORMAT_SIZE_T "d %p\n", ctx->pattern,
1294                   ctx->ptr, ctx->count, ctx->u.rep->pattern));
1295
1296            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) {
1297                /* not enough matches */
1298                ctx->u.rep->count = ctx->count;
1299                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1,
1300                        ctx->u.rep->pattern+3);
1301                if (ret) {
1302                    RETURN_ON_ERROR(ret);
1303                    RETURN_SUCCESS;
1304                }
1305                ctx->u.rep->count = ctx->count-1;
1306                state->ptr = ctx->ptr;
1307                RETURN_FAILURE;
1308            }
1309
1310            LASTMARK_SAVE();
1311
1312            /* see if the tail matches */
1313            state->repeat = ctx->u.rep->prev;
1314            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, ctx->pattern);
1315            if (ret) {
1316                RETURN_ON_ERROR(ret);
1317                RETURN_SUCCESS;
1318            }
1319
1320            state->repeat = ctx->u.rep;
1321            state->ptr = ctx->ptr;
1322
1323            LASTMARK_RESTORE();
1324
1325            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2]
1326                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) ||
1327                state->ptr == ctx->u.rep->last_ptr)
1328                RETURN_FAILURE;
1329
1330            ctx->u.rep->count = ctx->count;
1331            /* zero-width match protection */
1332            DATA_PUSH(&ctx->u.rep->last_ptr);
1333            ctx->u.rep->last_ptr = state->ptr;
1334            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3,
1335                    ctx->u.rep->pattern+3);
1336            DATA_POP(&ctx->u.rep->last_ptr);
1337            if (ret) {
1338                RETURN_ON_ERROR(ret);
1339                RETURN_SUCCESS;
1340            }
1341            ctx->u.rep->count = ctx->count-1;
1342            state->ptr = ctx->ptr;
1343            RETURN_FAILURE;
1344
1345        case SRE_OP_GROUPREF:
1346            /* match backreference */
1347            TRACE(("|%p|%p|GROUPREF %d\n", ctx->pattern,
1348                   ctx->ptr, ctx->pattern[0]));
1349            i = ctx->pattern[0];
1350            {
1351                Py_ssize_t groupref = i+i;
1352                if (groupref >= state->lastmark) {
1353                    RETURN_FAILURE;
1354                } else {
1355                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1356                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1357                    if (!p || !e || e < p)
1358                        RETURN_FAILURE;
1359                    while (p < e) {
1360                        if (ctx->ptr >= end || *ctx->ptr != *p)
1361                            RETURN_FAILURE;
1362                        p++; ctx->ptr++;
1363                    }
1364                }
1365            }
1366            ctx->pattern++;
1367            break;
1368
1369        case SRE_OP_GROUPREF_IGNORE:
1370            /* match backreference */
1371            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", ctx->pattern,
1372                   ctx->ptr, ctx->pattern[0]));
1373            i = ctx->pattern[0];
1374            {
1375                Py_ssize_t groupref = i+i;
1376                if (groupref >= state->lastmark) {
1377                    RETURN_FAILURE;
1378                } else {
1379                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1380                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1381                    if (!p || !e || e < p)
1382                        RETURN_FAILURE;
1383                    while (p < e) {
1384                        if (ctx->ptr >= end ||
1385                            state->lower(*ctx->ptr) != state->lower(*p))
1386                            RETURN_FAILURE;
1387                        p++; ctx->ptr++;
1388                    }
1389                }
1390            }
1391            ctx->pattern++;
1392            break;
1393
1394        case SRE_OP_GROUPREF_EXISTS:
1395            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", ctx->pattern,
1396                   ctx->ptr, ctx->pattern[0]));
1397            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
1398            i = ctx->pattern[0];
1399            {
1400                Py_ssize_t groupref = i+i;
1401                if (groupref >= state->lastmark) {
1402                    ctx->pattern += ctx->pattern[1];
1403                    break;
1404                } else {
1405                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
1406                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
1407                    if (!p || !e || e < p) {
1408                        ctx->pattern += ctx->pattern[1];
1409                        break;
1410                    }
1411                }
1412            }
1413            ctx->pattern += 2;
1414            break;
1415
1416        case SRE_OP_ASSERT:
1417            /* assert subpattern */
1418            /* <ASSERT> <skip> <back> <pattern> */
1419            TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
1420                   ctx->ptr, ctx->pattern[1]));
1421            state->ptr = ctx->ptr - ctx->pattern[1];
1422            if (state->ptr < state->beginning)
1423                RETURN_FAILURE;
1424            DO_JUMP(JUMP_ASSERT, jump_assert, ctx->pattern+2);
1425            RETURN_ON_FAILURE(ret);
1426            ctx->pattern += ctx->pattern[0];
1427            break;
1428
1429        case SRE_OP_ASSERT_NOT:
1430            /* assert not subpattern */
1431            /* <ASSERT_NOT> <skip> <back> <pattern> */
1432            TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
1433                   ctx->ptr, ctx->pattern[1]));
1434            state->ptr = ctx->ptr - ctx->pattern[1];
1435            if (state->ptr >= state->beginning) {
1436                DO_JUMP(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
1437                if (ret) {
1438                    RETURN_ON_ERROR(ret);
1439                    RETURN_FAILURE;
1440                }
1441            }
1442            ctx->pattern += ctx->pattern[0];
1443            break;
1444
1445        case SRE_OP_FAILURE:
1446            /* immediate failure */
1447            TRACE(("|%p|%p|FAILURE\n", ctx->pattern, ctx->ptr));
1448            RETURN_FAILURE;
1449
1450        default:
1451            TRACE(("|%p|%p|UNKNOWN %d\n", ctx->pattern, ctx->ptr,
1452                   ctx->pattern[-1]));
1453            RETURN_ERROR(SRE_ERROR_ILLEGAL);
1454        }
1455    }
1456
1457exit:
1458    ctx_pos = ctx->last_ctx_pos;
1459    jump = ctx->jump;
1460    DATA_POP_DISCARD(ctx);
1461    if (ctx_pos == -1)
1462        return ret;
1463    DATA_LOOKUP_AT(SRE_MATCH_CONTEXT, ctx, ctx_pos);
1464
1465    switch (jump) {
1466        case JUMP_MAX_UNTIL_2:
1467            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", ctx->pattern, ctx->ptr));
1468            goto jump_max_until_2;
1469        case JUMP_MAX_UNTIL_3:
1470            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", ctx->pattern, ctx->ptr));
1471            goto jump_max_until_3;
1472        case JUMP_MIN_UNTIL_2:
1473            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", ctx->pattern, ctx->ptr));
1474            goto jump_min_until_2;
1475        case JUMP_MIN_UNTIL_3:
1476            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", ctx->pattern, ctx->ptr));
1477            goto jump_min_until_3;
1478        case JUMP_BRANCH:
1479            TRACE(("|%p|%p|JUMP_BRANCH\n", ctx->pattern, ctx->ptr));
1480            goto jump_branch;
1481        case JUMP_MAX_UNTIL_1:
1482            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", ctx->pattern, ctx->ptr));
1483            goto jump_max_until_1;
1484        case JUMP_MIN_UNTIL_1:
1485            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", ctx->pattern, ctx->ptr));
1486            goto jump_min_until_1;
1487        case JUMP_REPEAT:
1488            TRACE(("|%p|%p|JUMP_REPEAT\n", ctx->pattern, ctx->ptr));
1489            goto jump_repeat;
1490        case JUMP_REPEAT_ONE_1:
1491            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", ctx->pattern, ctx->ptr));
1492            goto jump_repeat_one_1;
1493        case JUMP_REPEAT_ONE_2:
1494            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", ctx->pattern, ctx->ptr));
1495            goto jump_repeat_one_2;
1496        case JUMP_MIN_REPEAT_ONE:
1497            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", ctx->pattern, ctx->ptr));
1498            goto jump_min_repeat_one;
1499        case JUMP_ASSERT:
1500            TRACE(("|%p|%p|JUMP_ASSERT\n", ctx->pattern, ctx->ptr));
1501            goto jump_assert;
1502        case JUMP_ASSERT_NOT:
1503            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", ctx->pattern, ctx->ptr));
1504            goto jump_assert_not;
1505        case JUMP_NONE:
1506            TRACE(("|%p|%p|RETURN %" PY_FORMAT_SIZE_T "d\n", ctx->pattern,
1507                   ctx->ptr, ret));
1508            break;
1509    }
1510
1511    return ret; /* should never get here */
1512}
1513
1514LOCAL(Py_ssize_t)
1515SRE_SEARCH(SRE_STATE* state, SRE_CODE* pattern)
1516{
1517    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
1518    SRE_CHAR* end = (SRE_CHAR *)state->end;
1519    Py_ssize_t status = 0;
1520    Py_ssize_t prefix_len = 0;
1521    Py_ssize_t prefix_skip = 0;
1522    SRE_CODE* prefix = NULL;
1523    SRE_CODE* charset = NULL;
1524    SRE_CODE* overlap = NULL;
1525    int flags = 0;
1526
1527    if (pattern[0] == SRE_OP_INFO) {
1528        /* optimization info block */
1529        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */
1530
1531        flags = pattern[2];
1532
1533        if (pattern[3] > 1) {
1534            /* adjust end point (but make sure we leave at least one
1535               character in there, so literal search will work) */
1536            end -= pattern[3]-1;
1537            if (end <= ptr)
1538                end = ptr+1;
1539        }
1540
1541        if (flags & SRE_INFO_PREFIX) {
1542            /* pattern starts with a known prefix */
1543            /* <length> <skip> <prefix data> <overlap data> */
1544            prefix_len = pattern[5];
1545            prefix_skip = pattern[6];
1546            prefix = pattern + 7;
1547            overlap = prefix + prefix_len - 1;
1548        } else if (flags & SRE_INFO_CHARSET)
1549            /* pattern starts with a character from a known set */
1550            /* <charset> */
1551            charset = pattern + 5;
1552
1553        pattern += 1 + pattern[1];
1554    }
1555
1556    TRACE(("prefix = %p %" PY_FORMAT_SIZE_T "d %" PY_FORMAT_SIZE_T "d\n",
1557           prefix, prefix_len, prefix_skip));
1558    TRACE(("charset = %p\n", charset));
1559
1560#if defined(USE_FAST_SEARCH)
1561    if (prefix_len > 1) {
1562        /* pattern starts with a known prefix.  use the overlap
1563           table to skip forward as fast as we possibly can */
1564        Py_ssize_t i = 0;
1565        end = (SRE_CHAR *)state->end;
1566        while (ptr < end) {
1567            for (;;) {
1568                if ((SRE_CODE) ptr[0] != prefix[i]) {
1569                    if (!i)
1570                        break;
1571                    else
1572                        i = overlap[i];
1573                } else {
1574                    if (++i == prefix_len) {
1575                        /* found a potential match */
1576                        TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr));
1577                        state->start = ptr + 1 - prefix_len;
1578                        state->ptr = ptr + 1 - prefix_len + prefix_skip;
1579                        if (flags & SRE_INFO_LITERAL)
1580                            return 1; /* we got all of it */
1581                        status = SRE_MATCH(state, pattern + 2*prefix_skip);
1582                        if (status != 0)
1583                            return status;
1584                        /* close but no cigar -- try again */
1585                        i = overlap[i];
1586                    }
1587                    break;
1588                }
1589            }
1590            ptr++;
1591        }
1592        return 0;
1593    }
1594#endif
1595
1596    if (pattern[0] == SRE_OP_LITERAL) {
1597        /* pattern starts with a literal character.  this is used
1598           for short prefixes, and if fast search is disabled */
1599        SRE_CODE chr = pattern[1];
1600        end = (SRE_CHAR *)state->end;
1601        for (;;) {
1602            while (ptr < end && (SRE_CODE) ptr[0] != chr)
1603                ptr++;
1604            if (ptr >= end)
1605                return 0;
1606            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr));
1607            state->start = ptr;
1608            state->ptr = ++ptr;
1609            if (flags & SRE_INFO_LITERAL)
1610                return 1; /* we got all of it */
1611            status = SRE_MATCH(state, pattern + 2);
1612            if (status != 0)
1613                break;
1614        }
1615    } else if (charset) {
1616        /* pattern starts with a character from a known set */
1617        end = (SRE_CHAR *)state->end;
1618        for (;;) {
1619            while (ptr < end && !SRE_CHARSET(charset, ptr[0]))
1620                ptr++;
1621            if (ptr >= end)
1622                return 0;
1623            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr));
1624            state->start = ptr;
1625            state->ptr = ptr;
1626            status = SRE_MATCH(state, pattern);
1627            if (status != 0)
1628                break;
1629            ptr++;
1630        }
1631    } else
1632        /* general case */
1633        while (ptr <= end) {
1634            TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1635            state->start = state->ptr = ptr++;
1636            status = SRE_MATCH(state, pattern);
1637            if (status != 0)
1638                break;
1639        }
1640
1641    return status;
1642}
1643
1644LOCAL(int)
1645SRE_LITERAL_TEMPLATE(SRE_CHAR* ptr, Py_ssize_t len)
1646{
1647    /* check if given string is a literal template (i.e. no escapes) */
1648    while (len-- > 0)
1649        if (*ptr++ == '\\')
1650            return 0;
1651    return 1;
1652}
1653
1654#if !defined(SRE_RECURSIVE)
1655
1656/* -------------------------------------------------------------------- */
1657/* factories and destructors */
1658
1659/* see sre.h for object declarations */
1660static PyObject*pattern_new_match(PatternObject*, SRE_STATE*, int);
1661static PyObject*pattern_scanner(PatternObject*, PyObject*);
1662
1663static PyObject *
1664sre_codesize(PyObject* self, PyObject *unused)
1665{
1666    return PyInt_FromSize_t(sizeof(SRE_CODE));
1667}
1668
1669static PyObject *
1670sre_getlower(PyObject* self, PyObject* args)
1671{
1672    int character, flags;
1673    if (!PyArg_ParseTuple(args, "ii", &character, &flags))
1674        return NULL;
1675    if (flags & SRE_FLAG_LOCALE)
1676        return Py_BuildValue("i", sre_lower_locale(character));
1677    if (flags & SRE_FLAG_UNICODE)
1678#if defined(HAVE_UNICODE)
1679        return Py_BuildValue("i", sre_lower_unicode(character));
1680#else
1681        return Py_BuildValue("i", sre_lower_locale(character));
1682#endif
1683    return Py_BuildValue("i", sre_lower(character));
1684}
1685
1686LOCAL(void)
1687state_reset(SRE_STATE* state)
1688{
1689    /* FIXME: dynamic! */
1690    /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/
1691
1692    state->lastmark = -1;
1693    state->lastindex = -1;
1694
1695    state->repeat = NULL;
1696
1697    data_stack_dealloc(state);
1698}
1699
1700static void*
1701getstring(PyObject* string, Py_ssize_t* p_length, int* p_charsize)
1702{
1703    /* given a python object, return a data pointer, a length (in
1704       characters), and a character size.  return NULL if the object
1705       is not a string (or not compatible) */
1706
1707    PyBufferProcs *buffer;
1708    Py_ssize_t size, bytes;
1709    int charsize;
1710    void* ptr;
1711
1712#if defined(HAVE_UNICODE)
1713    if (PyUnicode_Check(string)) {
1714        /* unicode strings doesn't always support the buffer interface */
1715        ptr = (void*) PyUnicode_AS_DATA(string);
1716        /* bytes = PyUnicode_GET_DATA_SIZE(string); */
1717        size = PyUnicode_GET_SIZE(string);
1718        charsize = sizeof(Py_UNICODE);
1719
1720    } else {
1721#endif
1722
1723    /* get pointer to string buffer */
1724    buffer = Py_TYPE(string)->tp_as_buffer;
1725    if (!buffer || !buffer->bf_getreadbuffer || !buffer->bf_getsegcount ||
1726        buffer->bf_getsegcount(string, NULL) != 1) {
1727        PyErr_SetString(PyExc_TypeError, "expected string or buffer");
1728        return NULL;
1729    }
1730
1731    /* determine buffer size */
1732    bytes = buffer->bf_getreadbuffer(string, 0, &ptr);
1733    if (bytes < 0) {
1734        PyErr_SetString(PyExc_TypeError, "buffer has negative size");
1735        return NULL;
1736    }
1737
1738    /* determine character size */
1739#if PY_VERSION_HEX >= 0x01060000
1740    size = PyObject_Size(string);
1741#else
1742    size = PyObject_Length(string);
1743#endif
1744
1745    if (PyString_Check(string) || bytes == size)
1746        charsize = 1;
1747#if defined(HAVE_UNICODE)
1748    else if (bytes == (Py_ssize_t) (size * sizeof(Py_UNICODE)))
1749        charsize = sizeof(Py_UNICODE);
1750#endif
1751    else {
1752        PyErr_SetString(PyExc_TypeError, "buffer size mismatch");
1753        return NULL;
1754    }
1755
1756#if defined(HAVE_UNICODE)
1757    }
1758#endif
1759
1760    *p_length = size;
1761    *p_charsize = charsize;
1762
1763    return ptr;
1764}
1765
1766LOCAL(PyObject*)
1767state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
1768           Py_ssize_t start, Py_ssize_t end)
1769{
1770    /* prepare state object */
1771
1772    Py_ssize_t length;
1773    int charsize;
1774    void* ptr;
1775
1776    memset(state, 0, sizeof(SRE_STATE));
1777
1778    state->lastmark = -1;
1779    state->lastindex = -1;
1780
1781    ptr = getstring(string, &length, &charsize);
1782    if (!ptr)
1783        return NULL;
1784
1785    /* adjust boundaries */
1786    if (start < 0)
1787        start = 0;
1788    else if (start > length)
1789        start = length;
1790
1791    if (end < 0)
1792        end = 0;
1793    else if (end > length)
1794        end = length;
1795
1796    state->charsize = charsize;
1797
1798    state->beginning = ptr;
1799
1800    state->start = (void*) ((char*) ptr + start * state->charsize);
1801    state->end = (void*) ((char*) ptr + end * state->charsize);
1802
1803    Py_INCREF(string);
1804    state->string = string;
1805    state->pos = start;
1806    state->endpos = end;
1807
1808    if (pattern->flags & SRE_FLAG_LOCALE)
1809        state->lower = sre_lower_locale;
1810    else if (pattern->flags & SRE_FLAG_UNICODE)
1811#if defined(HAVE_UNICODE)
1812        state->lower = sre_lower_unicode;
1813#else
1814        state->lower = sre_lower_locale;
1815#endif
1816    else
1817        state->lower = sre_lower;
1818
1819    return string;
1820}
1821
1822LOCAL(void)
1823state_fini(SRE_STATE* state)
1824{
1825    Py_XDECREF(state->string);
1826    data_stack_dealloc(state);
1827}
1828
1829/* calculate offset from start of string */
1830#define STATE_OFFSET(state, member)\
1831    (((char*)(member) - (char*)(state)->beginning) / (state)->charsize)
1832
1833LOCAL(PyObject*)
1834state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty)
1835{
1836    Py_ssize_t i, j;
1837
1838    index = (index - 1) * 2;
1839
1840    if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) {
1841        if (empty)
1842            /* want empty string */
1843            i = j = 0;
1844        else {
1845            Py_INCREF(Py_None);
1846            return Py_None;
1847        }
1848    } else {
1849        i = STATE_OFFSET(state, state->mark[index]);
1850        j = STATE_OFFSET(state, state->mark[index+1]);
1851    }
1852
1853    return PySequence_GetSlice(string, i, j);
1854}
1855
1856static void
1857pattern_error(int status)
1858{
1859    switch (status) {
1860    case SRE_ERROR_RECURSION_LIMIT:
1861        PyErr_SetString(
1862            PyExc_RuntimeError,
1863            "maximum recursion limit exceeded"
1864            );
1865        break;
1866    case SRE_ERROR_MEMORY:
1867        PyErr_NoMemory();
1868        break;
1869    case SRE_ERROR_INTERRUPTED:
1870    /* An exception has already been raised, so let it fly */
1871        break;
1872    default:
1873        /* other error codes indicate compiler/engine bugs */
1874        PyErr_SetString(
1875            PyExc_RuntimeError,
1876            "internal error in regular expression engine"
1877            );
1878    }
1879}
1880
1881static void
1882pattern_dealloc(PatternObject* self)
1883{
1884    if (self->weakreflist != NULL)
1885        PyObject_ClearWeakRefs((PyObject *) self);
1886    Py_XDECREF(self->pattern);
1887    Py_XDECREF(self->groupindex);
1888    Py_XDECREF(self->indexgroup);
1889    PyObject_DEL(self);
1890}
1891
1892static int
1893check_args_size(const char *name, PyObject* args, PyObject* kw, int n)
1894{
1895    Py_ssize_t m = PyTuple_GET_SIZE(args) + (kw ? PyDict_Size(kw) : 0);
1896    if (m <= n)
1897        return 1;
1898    PyErr_Format(PyExc_TypeError,
1899                 "%s() takes at most %d positional arguments (%zd given)",
1900                 name, n, m);
1901    return 0;
1902}
1903
1904static PyObject*
1905fix_string_param(PyObject *string, PyObject *string2, const char *oldname)
1906{
1907    if (string2 != NULL) {
1908        char buf[100];
1909        if (string != NULL) {
1910            PyErr_Format(PyExc_TypeError,
1911                         "Argument given by name ('%s') and position (1)",
1912                         oldname);
1913            return NULL;
1914        }
1915        sprintf(buf, "The '%s' keyword parameter name is deprecated.  "
1916                     "Use 'string' instead.", oldname);
1917        if (PyErr_Warn(PyExc_DeprecationWarning, buf) < 0)
1918            return NULL;
1919        return string2;
1920    }
1921    if (string == NULL) {
1922        PyErr_SetString(PyExc_TypeError,
1923                        "Required argument 'string' (pos 1) not found");
1924        return NULL;
1925    }
1926    return string;
1927}
1928
1929static PyObject*
1930pattern_match(PatternObject* self, PyObject* args, PyObject* kw)
1931{
1932    SRE_STATE state;
1933    int status;
1934
1935    PyObject *string = NULL, *string2 = NULL;
1936    Py_ssize_t start = 0;
1937    Py_ssize_t end = PY_SSIZE_T_MAX;
1938    static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
1939    if (!check_args_size("match", args, kw, 3))
1940        return NULL;
1941
1942    if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnnO:match", kwlist,
1943                                     &string, &start, &end, &string2))
1944        return NULL;
1945
1946    string = fix_string_param(string, string2, "pattern");
1947    if (!string)
1948        return NULL;
1949
1950    string = state_init(&state, self, string, start, end);
1951    if (!string)
1952        return NULL;
1953
1954    state.ptr = state.start;
1955
1956    TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr));
1957
1958    if (state.charsize == 1) {
1959        status = sre_match(&state, PatternObject_GetCode(self));
1960    } else {
1961#if defined(HAVE_UNICODE)
1962        status = sre_umatch(&state, PatternObject_GetCode(self));
1963#endif
1964    }
1965
1966    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
1967    if (PyErr_Occurred())
1968        return NULL;
1969
1970    state_fini(&state);
1971
1972    return pattern_new_match(self, &state, status);
1973}
1974
1975static PyObject*
1976pattern_search(PatternObject* self, PyObject* args, PyObject* kw)
1977{
1978    SRE_STATE state;
1979    int status;
1980
1981    PyObject *string = NULL, *string2 = NULL;
1982    Py_ssize_t start = 0;
1983    Py_ssize_t end = PY_SSIZE_T_MAX;
1984    static char* kwlist[] = { "string", "pos", "endpos", "pattern", NULL };
1985    if (!check_args_size("search", args, kw, 3))
1986        return NULL;
1987
1988    if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnnO:search", kwlist,
1989                                     &string, &start, &end, &string2))
1990        return NULL;
1991
1992    string = fix_string_param(string, string2, "pattern");
1993    if (!string)
1994        return NULL;
1995
1996    string = state_init(&state, self, string, start, end);
1997    if (!string)
1998        return NULL;
1999
2000    TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr));
2001
2002    if (state.charsize == 1) {
2003        status = sre_search(&state, PatternObject_GetCode(self));
2004    } else {
2005#if defined(HAVE_UNICODE)
2006        status = sre_usearch(&state, PatternObject_GetCode(self));
2007#endif
2008    }
2009
2010    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr));
2011
2012    state_fini(&state);
2013
2014    if (PyErr_Occurred())
2015        return NULL;
2016
2017    return pattern_new_match(self, &state, status);
2018}
2019
2020static PyObject*
2021call(char* module, char* function, PyObject* args)
2022{
2023    PyObject* name;
2024    PyObject* mod;
2025    PyObject* func;
2026    PyObject* result;
2027
2028    if (!args)
2029        return NULL;
2030    name = PyString_FromString(module);
2031    if (!name)
2032        return NULL;
2033    mod = PyImport_Import(name);
2034    Py_DECREF(name);
2035    if (!mod)
2036        return NULL;
2037    func = PyObject_GetAttrString(mod, function);
2038    Py_DECREF(mod);
2039    if (!func)
2040        return NULL;
2041    result = PyObject_CallObject(func, args);
2042    Py_DECREF(func);
2043    Py_DECREF(args);
2044    return result;
2045}
2046
2047#ifdef USE_BUILTIN_COPY
2048static int
2049deepcopy(PyObject** object, PyObject* memo)
2050{
2051    PyObject* copy;
2052
2053    copy = call(
2054        "copy", "deepcopy",
2055        PyTuple_Pack(2, *object, memo)
2056        );
2057    if (!copy)
2058        return 0;
2059
2060    Py_DECREF(*object);
2061    *object = copy;
2062
2063    return 1; /* success */
2064}
2065#endif
2066
2067static PyObject*
2068join_list(PyObject* list, PyObject* string)
2069{
2070    /* join list elements */
2071
2072    PyObject* joiner;
2073#if PY_VERSION_HEX >= 0x01060000
2074    PyObject* function;
2075    PyObject* args;
2076#endif
2077    PyObject* result;
2078
2079    joiner = PySequence_GetSlice(string, 0, 0);
2080    if (!joiner)
2081        return NULL;
2082
2083    if (PyList_GET_SIZE(list) == 0) {
2084        Py_DECREF(list);
2085        return joiner;
2086    }
2087
2088#if PY_VERSION_HEX >= 0x01060000
2089    function = PyObject_GetAttrString(joiner, "join");
2090    if (!function) {
2091        Py_DECREF(joiner);
2092        return NULL;
2093    }
2094    args = PyTuple_New(1);
2095    if (!args) {
2096        Py_DECREF(function);
2097        Py_DECREF(joiner);
2098        return NULL;
2099    }
2100    PyTuple_SET_ITEM(args, 0, list);
2101    result = PyObject_CallObject(function, args);
2102    Py_DECREF(args); /* also removes list */
2103    Py_DECREF(function);
2104#else
2105    result = call(
2106        "string", "join",
2107        PyTuple_Pack(2, list, joiner)
2108        );
2109#endif
2110    Py_DECREF(joiner);
2111
2112    return result;
2113}
2114
2115static PyObject*
2116pattern_findall(PatternObject* self, PyObject* args, PyObject* kw)
2117{
2118    SRE_STATE state;
2119    PyObject* list;
2120    int status;
2121    Py_ssize_t i, b, e;
2122
2123    PyObject *string = NULL, *string2 = NULL;
2124    Py_ssize_t start = 0;
2125    Py_ssize_t end = PY_SSIZE_T_MAX;
2126    static char* kwlist[] = { "string", "pos", "endpos", "source", NULL };
2127    if (!check_args_size("findall", args, kw, 3))
2128        return NULL;
2129
2130    if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnnO:findall", kwlist,
2131                                     &string, &start, &end, &string2))
2132        return NULL;
2133
2134    string = fix_string_param(string, string2, "source");
2135    if (!string)
2136        return NULL;
2137
2138    string = state_init(&state, self, string, start, end);
2139    if (!string)
2140        return NULL;
2141
2142    list = PyList_New(0);
2143    if (!list) {
2144        state_fini(&state);
2145        return NULL;
2146    }
2147
2148    while (state.start <= state.end) {
2149
2150        PyObject* item;
2151
2152        state_reset(&state);
2153
2154        state.ptr = state.start;
2155
2156        if (state.charsize == 1) {
2157            status = sre_search(&state, PatternObject_GetCode(self));
2158        } else {
2159#if defined(HAVE_UNICODE)
2160            status = sre_usearch(&state, PatternObject_GetCode(self));
2161#endif
2162        }
2163
2164  if (PyErr_Occurred())
2165      goto error;
2166
2167        if (status <= 0) {
2168            if (status == 0)
2169                break;
2170            pattern_error(status);
2171            goto error;
2172        }
2173
2174        /* don't bother to build a match object */
2175        switch (self->groups) {
2176        case 0:
2177            b = STATE_OFFSET(&state, state.start);
2178            e = STATE_OFFSET(&state, state.ptr);
2179            item = PySequence_GetSlice(string, b, e);
2180            if (!item)
2181                goto error;
2182            break;
2183        case 1:
2184            item = state_getslice(&state, 1, string, 1);
2185            if (!item)
2186                goto error;
2187            break;
2188        default:
2189            item = PyTuple_New(self->groups);
2190            if (!item)
2191                goto error;
2192            for (i = 0; i < self->groups; i++) {
2193                PyObject* o = state_getslice(&state, i+1, string, 1);
2194                if (!o) {
2195                    Py_DECREF(item);
2196                    goto error;
2197                }
2198                PyTuple_SET_ITEM(item, i, o);
2199            }
2200            break;
2201        }
2202
2203        status = PyList_Append(list, item);
2204        Py_DECREF(item);
2205        if (status < 0)
2206            goto error;
2207
2208        if (state.ptr == state.start)
2209            state.start = (void*) ((char*) state.ptr + state.charsize);
2210        else
2211            state.start = state.ptr;
2212
2213    }
2214
2215    state_fini(&state);
2216    return list;
2217
2218error:
2219    Py_DECREF(list);
2220    state_fini(&state);
2221    return NULL;
2222
2223}
2224
2225#if PY_VERSION_HEX >= 0x02020000
2226static PyObject*
2227pattern_finditer(PatternObject* pattern, PyObject* args)
2228{
2229    PyObject* scanner;
2230    PyObject* search;
2231    PyObject* iterator;
2232
2233    scanner = pattern_scanner(pattern, args);
2234    if (!scanner)
2235        return NULL;
2236
2237    search = PyObject_GetAttrString(scanner, "search");
2238    Py_DECREF(scanner);
2239    if (!search)
2240        return NULL;
2241
2242    iterator = PyCallIter_New(search, Py_None);
2243    Py_DECREF(search);
2244
2245    return iterator;
2246}
2247#endif
2248
2249static PyObject*
2250pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
2251{
2252    SRE_STATE state;
2253    PyObject* list;
2254    PyObject* item;
2255    int status;
2256    Py_ssize_t n;
2257    Py_ssize_t i;
2258    void* last;
2259
2260    PyObject *string = NULL, *string2 = NULL;
2261    Py_ssize_t maxsplit = 0;
2262    static char* kwlist[] = { "string", "maxsplit", "source", NULL };
2263    if (!check_args_size("split", args, kw, 2))
2264        return NULL;
2265
2266    if (!PyArg_ParseTupleAndKeywords(args, kw, "|OnO:split", kwlist,
2267                                     &string, &maxsplit, &string2))
2268        return NULL;
2269
2270    string = fix_string_param(string, string2, "source");
2271    if (!string)
2272        return NULL;
2273
2274    string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
2275    if (!string)
2276        return NULL;
2277
2278    list = PyList_New(0);
2279    if (!list) {
2280        state_fini(&state);
2281        return NULL;
2282    }
2283
2284    n = 0;
2285    last = state.start;
2286
2287    while (!maxsplit || n < maxsplit) {
2288
2289        state_reset(&state);
2290
2291        state.ptr = state.start;
2292
2293        if (state.charsize == 1) {
2294            status = sre_search(&state, PatternObject_GetCode(self));
2295        } else {
2296#if defined(HAVE_UNICODE)
2297            status = sre_usearch(&state, PatternObject_GetCode(self));
2298#endif
2299        }
2300
2301  if (PyErr_Occurred())
2302      goto error;
2303
2304        if (status <= 0) {
2305            if (status == 0)
2306                break;
2307            pattern_error(status);
2308            goto error;
2309        }
2310
2311        if (state.start == state.ptr) {
2312            if (last == state.end)
2313                break;
2314            /* skip one character */
2315            state.start = (void*) ((char*) state.ptr + state.charsize);
2316            continue;
2317        }
2318
2319        /* get segment before this match */
2320        item = PySequence_GetSlice(
2321            string, STATE_OFFSET(&state, last),
2322            STATE_OFFSET(&state, state.start)
2323            );
2324        if (!item)
2325            goto error;
2326        status = PyList_Append(list, item);
2327        Py_DECREF(item);
2328        if (status < 0)
2329            goto error;
2330
2331        /* add groups (if any) */
2332        for (i = 0; i < self->groups; i++) {
2333            item = state_getslice(&state, i+1, string, 0);
2334            if (!item)
2335                goto error;
2336            status = PyList_Append(list, item);
2337            Py_DECREF(item);
2338            if (status < 0)
2339                goto error;
2340        }
2341
2342        n = n + 1;
2343
2344        last = state.start = state.ptr;
2345
2346    }
2347
2348    /* get segment following last match (even if empty) */
2349    item = PySequence_GetSlice(
2350        string, STATE_OFFSET(&state, last), state.endpos
2351        );
2352    if (!item)
2353        goto error;
2354    status = PyList_Append(list, item);
2355    Py_DECREF(item);
2356    if (status < 0)
2357        goto error;
2358
2359    state_fini(&state);
2360    return list;
2361
2362error:
2363    Py_DECREF(list);
2364    state_fini(&state);
2365    return NULL;
2366
2367}
2368
2369static PyObject*
2370pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
2371             Py_ssize_t count, Py_ssize_t subn)
2372{
2373    SRE_STATE state;
2374    PyObject* list;
2375    PyObject* item;
2376    PyObject* filter;
2377    PyObject* args;
2378    PyObject* match;
2379    void* ptr;
2380    int status;
2381    Py_ssize_t n;
2382    Py_ssize_t i, b, e;
2383    int bint;
2384    int filter_is_callable;
2385
2386    if (PyCallable_Check(ptemplate)) {
2387        /* sub/subn takes either a function or a template */
2388        filter = ptemplate;
2389        Py_INCREF(filter);
2390        filter_is_callable = 1;
2391    } else {
2392        /* if not callable, check if it's a literal string */
2393        int literal;
2394        ptr = getstring(ptemplate, &n, &bint);
2395        b = bint;
2396        if (ptr) {
2397            if (b == 1) {
2398        literal = sre_literal_template((unsigned char *)ptr, n);
2399            } else {
2400#if defined(HAVE_UNICODE)
2401        literal = sre_uliteral_template((Py_UNICODE *)ptr, n);
2402#endif
2403            }
2404        } else {
2405            PyErr_Clear();
2406            literal = 0;
2407        }
2408        if (literal) {
2409            filter = ptemplate;
2410            Py_INCREF(filter);
2411            filter_is_callable = 0;
2412        } else {
2413            /* not a literal; hand it over to the template compiler */
2414            filter = call(
2415                SRE_PY_MODULE, "_subx",
2416                PyTuple_Pack(2, self, ptemplate)
2417                );
2418            if (!filter)
2419                return NULL;
2420            filter_is_callable = PyCallable_Check(filter);
2421        }
2422    }
2423
2424    string = state_init(&state, self, string, 0, PY_SSIZE_T_MAX);
2425    if (!string) {
2426        Py_DECREF(filter);
2427        return NULL;
2428    }
2429
2430    list = PyList_New(0);
2431    if (!list) {
2432        Py_DECREF(filter);
2433        state_fini(&state);
2434        return NULL;
2435    }
2436
2437    n = i = 0;
2438
2439    while (!count || n < count) {
2440
2441        state_reset(&state);
2442
2443        state.ptr = state.start;
2444
2445        if (state.charsize == 1) {
2446            status = sre_search(&state, PatternObject_GetCode(self));
2447        } else {
2448#if defined(HAVE_UNICODE)
2449            status = sre_usearch(&state, PatternObject_GetCode(self));
2450#endif
2451        }
2452
2453  if (PyErr_Occurred())
2454      goto error;
2455
2456        if (status <= 0) {
2457            if (status == 0)
2458                break;
2459            pattern_error(status);
2460            goto error;
2461        }
2462
2463        b = STATE_OFFSET(&state, state.start);
2464        e = STATE_OFFSET(&state, state.ptr);
2465
2466        if (i < b) {
2467            /* get segment before this match */
2468            item = PySequence_GetSlice(string, i, b);
2469            if (!item)
2470                goto error;
2471            status = PyList_Append(list, item);
2472            Py_DECREF(item);
2473            if (status < 0)
2474                goto error;
2475
2476        } else if (i == b && i == e && n > 0)
2477            /* ignore empty match on latest position */
2478            goto next;
2479
2480        if (filter_is_callable) {
2481            /* pass match object through filter */
2482            match = pattern_new_match(self, &state, 1);
2483            if (!match)
2484                goto error;
2485            args = PyTuple_Pack(1, match);
2486            if (!args) {
2487                Py_DECREF(match);
2488                goto error;
2489            }
2490            item = PyObject_CallObject(filter, args);
2491            Py_DECREF(args);
2492            Py_DECREF(match);
2493            if (!item)
2494                goto error;
2495        } else {
2496            /* filter is literal string */
2497            item = filter;
2498            Py_INCREF(item);
2499        }
2500
2501        /* add to list */
2502        if (item != Py_None) {
2503            status = PyList_Append(list, item);
2504            Py_DECREF(item);
2505            if (status < 0)
2506                goto error;
2507        }
2508
2509        i = e;
2510        n = n + 1;
2511
2512next:
2513        /* move on */
2514        if (state.ptr == state.start)
2515            state.start = (void*) ((char*) state.ptr + state.charsize);
2516        else
2517            state.start = state.ptr;
2518
2519    }
2520
2521    /* get segment following last match */
2522    if (i < state.endpos) {
2523        item = PySequence_GetSlice(string, i, state.endpos);
2524        if (!item)
2525            goto error;
2526        status = PyList_Append(list, item);
2527        Py_DECREF(item);
2528        if (status < 0)
2529            goto error;
2530    }
2531
2532    state_fini(&state);
2533
2534    Py_DECREF(filter);
2535
2536    /* convert list to single string (also removes list) */
2537    item = join_list(list, string);
2538
2539    if (!item)
2540        return NULL;
2541
2542    if (subn)
2543        return Py_BuildValue("Nn", item, n);
2544
2545    return item;
2546
2547error:
2548    Py_DECREF(list);
2549    state_fini(&state);
2550    Py_DECREF(filter);
2551    return NULL;
2552
2553}
2554
2555static PyObject*
2556pattern_sub(PatternObject* self, PyObject* args, PyObject* kw)
2557{
2558    PyObject* ptemplate;
2559    PyObject* string;
2560    Py_ssize_t count = 0;
2561    static char* kwlist[] = { "repl", "string", "count", NULL };
2562    if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:sub", kwlist,
2563                                     &ptemplate, &string, &count))
2564        return NULL;
2565
2566    return pattern_subx(self, ptemplate, string, count, 0);
2567}
2568
2569static PyObject*
2570pattern_subn(PatternObject* self, PyObject* args, PyObject* kw)
2571{
2572    PyObject* ptemplate;
2573    PyObject* string;
2574    Py_ssize_t count = 0;
2575    static char* kwlist[] = { "repl", "string", "count", NULL };
2576    if (!PyArg_ParseTupleAndKeywords(args, kw, "OO|n:subn", kwlist,
2577                                     &ptemplate, &string, &count))
2578        return NULL;
2579
2580    return pattern_subx(self, ptemplate, string, count, 1);
2581}
2582
2583static PyObject*
2584pattern_copy(PatternObject* self, PyObject *unused)
2585{
2586#ifdef USE_BUILTIN_COPY
2587    PatternObject* copy;
2588    int offset;
2589
2590    copy = PyObject_NEW_VAR(PatternObject, &Pattern_Type, self->codesize);
2591    if (!copy)
2592        return NULL;
2593
2594    offset = offsetof(PatternObject, groups);
2595
2596    Py_XINCREF(self->groupindex);
2597    Py_XINCREF(self->indexgroup);
2598    Py_XINCREF(self->pattern);
2599
2600    memcpy((char*) copy + offset, (char*) self + offset,
2601           sizeof(PatternObject) + self->codesize * sizeof(SRE_CODE) - offset);
2602    copy->weakreflist = NULL;
2603
2604    return (PyObject*) copy;
2605#else
2606    PyErr_SetString(PyExc_TypeError, "cannot copy this pattern object");
2607    return NULL;
2608#endif
2609}
2610
2611static PyObject*
2612pattern_deepcopy(PatternObject* self, PyObject* memo)
2613{
2614#ifdef USE_BUILTIN_COPY
2615    PatternObject* copy;
2616
2617    copy = (PatternObject*) pattern_copy(self);
2618    if (!copy)
2619        return NULL;
2620
2621    if (!deepcopy(&copy->groupindex, memo) ||
2622        !deepcopy(&copy->indexgroup, memo) ||
2623        !deepcopy(&copy->pattern, memo)) {
2624        Py_DECREF(copy);
2625        return NULL;
2626    }
2627
2628#else
2629    PyErr_SetString(PyExc_TypeError, "cannot deepcopy this pattern object");
2630    return NULL;
2631#endif
2632}
2633
2634PyDoc_STRVAR(pattern_match_doc,
2635"match(string[, pos[, endpos]]) --> match object or None.\n\
2636    Matches zero or more characters at the beginning of the string");
2637
2638PyDoc_STRVAR(pattern_search_doc,
2639"search(string[, pos[, endpos]]) --> match object or None.\n\
2640    Scan through string looking for a match, and return a corresponding\n\
2641    match object instance. Return None if no position in the string matches.");
2642
2643PyDoc_STRVAR(pattern_split_doc,
2644"split(string[, maxsplit = 0])  --> list.\n\
2645    Split string by the occurrences of pattern.");
2646
2647PyDoc_STRVAR(pattern_findall_doc,
2648"findall(string[, pos[, endpos]]) --> list.\n\
2649   Return a list of all non-overlapping matches of pattern in string.");
2650
2651PyDoc_STRVAR(pattern_finditer_doc,
2652"finditer(string[, pos[, endpos]]) --> iterator.\n\
2653    Return an iterator over all non-overlapping matches for the \n\
2654    RE pattern in string. For each match, the iterator returns a\n\
2655    match object.");
2656
2657PyDoc_STRVAR(pattern_sub_doc,
2658"sub(repl, string[, count = 0]) --> newstring\n\
2659    Return the string obtained by replacing the leftmost non-overlapping\n\
2660    occurrences of pattern in string by the replacement repl.");
2661
2662PyDoc_STRVAR(pattern_subn_doc,
2663"subn(repl, string[, count = 0]) --> (newstring, number of subs)\n\
2664    Return the tuple (new_string, number_of_subs_made) found by replacing\n\
2665    the leftmost non-overlapping occurrences of pattern with the\n\
2666    replacement repl.");
2667
2668PyDoc_STRVAR(pattern_doc, "Compiled regular expression objects");
2669
2670static PyMethodDef pattern_methods[] = {
2671    {"match", (PyCFunction) pattern_match, METH_VARARGS|METH_KEYWORDS,
2672  pattern_match_doc},
2673    {"search", (PyCFunction) pattern_search, METH_VARARGS|METH_KEYWORDS,
2674  pattern_search_doc},
2675    {"sub", (PyCFunction) pattern_sub, METH_VARARGS|METH_KEYWORDS,
2676  pattern_sub_doc},
2677    {"subn", (PyCFunction) pattern_subn, METH_VARARGS|METH_KEYWORDS,
2678  pattern_subn_doc},
2679    {"split", (PyCFunction) pattern_split, METH_VARARGS|METH_KEYWORDS,
2680  pattern_split_doc},
2681    {"findall", (PyCFunction) pattern_findall, METH_VARARGS|METH_KEYWORDS,
2682  pattern_findall_doc},
2683#if PY_VERSION_HEX >= 0x02020000
2684    {"finditer", (PyCFunction) pattern_finditer, METH_VARARGS,
2685  pattern_finditer_doc},
2686#endif
2687    {"scanner", (PyCFunction) pattern_scanner, METH_VARARGS},
2688    {"__copy__", (PyCFunction) pattern_copy, METH_NOARGS},
2689    {"__deepcopy__", (PyCFunction) pattern_deepcopy, METH_O},
2690    {NULL, NULL}
2691};
2692
2693#define PAT_OFF(x) offsetof(PatternObject, x)
2694static PyMemberDef pattern_members[] = {
2695    {"pattern",    T_OBJECT,    PAT_OFF(pattern),       READONLY},
2696    {"flags",      T_INT,       PAT_OFF(flags),         READONLY},
2697    {"groups",     T_PYSSIZET,  PAT_OFF(groups),        READONLY},
2698    {"groupindex", T_OBJECT,    PAT_OFF(groupindex),    READONLY},
2699    {NULL}  /* Sentinel */
2700};
2701
2702statichere PyTypeObject Pattern_Type = {
2703    PyObject_HEAD_INIT(NULL)
2704    0, "_" SRE_MODULE ".SRE_Pattern",
2705    sizeof(PatternObject), sizeof(SRE_CODE),
2706    (destructor)pattern_dealloc, /*tp_dealloc*/
2707    0,                                  /* tp_print */
2708    0,                                  /* tp_getattrn */
2709    0,          /* tp_setattr */
2710    0,          /* tp_compare */
2711    0,          /* tp_repr */
2712    0,          /* tp_as_number */
2713    0,          /* tp_as_sequence */
2714    0,          /* tp_as_mapping */
2715    0,          /* tp_hash */
2716    0,          /* tp_call */
2717    0,          /* tp_str */
2718    0,          /* tp_getattro */
2719    0,          /* tp_setattro */
2720    0,          /* tp_as_buffer */
2721    Py_TPFLAGS_DEFAULT,           /* tp_flags */
2722    pattern_doc,      /* tp_doc */
2723    0,          /* tp_traverse */
2724    0,          /* tp_clear */
2725    0,          /* tp_richcompare */
2726    offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
2727    0,          /* tp_iter */
2728    0,          /* tp_iternext */
2729    pattern_methods,      /* tp_methods */
2730    pattern_members,      /* tp_members */
2731};
2732
2733static int _validate(PatternObject *self); /* Forward */
2734
2735static PyObject *
2736_compile(PyObject* self_, PyObject* args)
2737{
2738    /* "compile" pattern descriptor to pattern object */
2739
2740    PatternObject* self;
2741    Py_ssize_t i, n;
2742
2743    PyObject* pattern;
2744    int flags = 0;
2745    PyObject* code;
2746    Py_ssize_t groups = 0;
2747    PyObject* groupindex = NULL;
2748    PyObject* indexgroup = NULL;
2749    if (!PyArg_ParseTuple(args, "OiO!|nOO", &pattern, &flags,
2750                          &PyList_Type, &code, &groups,
2751                          &groupindex, &indexgroup))
2752        return NULL;
2753
2754    n = PyList_GET_SIZE(code);
2755    /* coverity[ampersand_in_size] */
2756    self = PyObject_NEW_VAR(PatternObject, &Pattern_Type, n);
2757    if (!self)
2758        return NULL;
2759    self->weakreflist = NULL;
2760    self->pattern = NULL;
2761    self->groupindex = NULL;
2762    self->indexgroup = NULL;
2763
2764    self->codesize = n;
2765
2766    for (i = 0; i < n; i++) {
2767        PyObject *o = PyList_GET_ITEM(code, i);
2768        unsigned long value = PyInt_Check(o) ? (unsigned long)PyInt_AsLong(o)
2769                                              : PyLong_AsUnsignedLong(o);
2770        if (value == (unsigned long)-1 && PyErr_Occurred()) {
2771            if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
2772                PyErr_SetString(PyExc_OverflowError,
2773                                "regular expression code size limit exceeded");
2774            }
2775            break;
2776        }
2777        self->code[i] = (SRE_CODE) value;
2778        if ((unsigned long) self->code[i] != value) {
2779            PyErr_SetString(PyExc_OverflowError,
2780                            "regular expression code size limit exceeded");
2781            break;
2782        }
2783    }
2784
2785    if (PyErr_Occurred()) {
2786        Py_DECREF(self);
2787        return NULL;
2788    }
2789
2790    Py_INCREF(pattern);
2791    self->pattern = pattern;
2792
2793    self->flags = flags;
2794
2795    self->groups = groups;
2796
2797    Py_XINCREF(groupindex);
2798    self->groupindex = groupindex;
2799
2800    Py_XINCREF(indexgroup);
2801    self->indexgroup = indexgroup;
2802
2803    self->weakreflist = NULL;
2804
2805    if (!_validate(self)) {
2806        Py_DECREF(self);
2807        return NULL;
2808    }
2809
2810    return (PyObject*) self;
2811}
2812
2813/* -------------------------------------------------------------------- */
2814/* Code validation */
2815
2816/* To learn more about this code, have a look at the _compile() function in
2817   Lib/sre_compile.py.  The validation functions below checks the code array
2818   for conformance with the code patterns generated there.
2819
2820   The nice thing about the generated code is that it is position-independent:
2821   all jumps are relative jumps forward.  Also, jumps don't cross each other:
2822   the target of a later jump is always earlier than the target of an earlier
2823   jump.  IOW, this is okay:
2824
2825   J---------J-------T--------T
2826    \         \_____/        /
2827     \______________________/
2828
2829   but this is not:
2830
2831   J---------J-------T--------T
2832    \_________\_____/        /
2833               \____________/
2834
2835   It also helps that SRE_CODE is always an unsigned type.
2836*/
2837
2838/* Defining this one enables tracing of the validator */
2839#undef VVERBOSE
2840
2841/* Trace macro for the validator */
2842#if defined(VVERBOSE)
2843#define VTRACE(v) printf v
2844#else
2845#define VTRACE(v) do {} while(0)  /* do nothing */
2846#endif
2847
2848/* Report failure */
2849#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
2850
2851/* Extract opcode, argument, or skip count from code array */
2852#define GET_OP                                          \
2853    do {                                                \
2854        VTRACE(("%p: ", code));                         \
2855        if (code >= end) FAIL;                          \
2856        op = *code++;                                   \
2857        VTRACE(("%lu (op)\n", (unsigned long)op));      \
2858    } while (0)
2859#define GET_ARG                                         \
2860    do {                                                \
2861        VTRACE(("%p= ", code));                         \
2862        if (code >= end) FAIL;                          \
2863        arg = *code++;                                  \
2864        VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
2865    } while (0)
2866#define GET_SKIP_ADJ(adj)                               \
2867    do {                                                \
2868        VTRACE(("%p= ", code));                         \
2869        if (code >= end) FAIL;                          \
2870        skip = *code;                                   \
2871        VTRACE(("%lu (skip to %p)\n",                   \
2872               (unsigned long)skip, code+skip));        \
2873        if (skip-adj > end-code)                        \
2874            FAIL;                                       \
2875        code++;                                         \
2876    } while (0)
2877#define GET_SKIP GET_SKIP_ADJ(0)
2878
2879static int
2880_validate_charset(SRE_CODE *code, SRE_CODE *end)
2881{
2882    /* Some variables are manipulated by the macros above */
2883    SRE_CODE op;
2884    SRE_CODE arg;
2885    SRE_CODE offset;
2886    int i;
2887
2888    while (code < end) {
2889        GET_OP;
2890        switch (op) {
2891
2892        case SRE_OP_NEGATE:
2893            break;
2894
2895        case SRE_OP_LITERAL:
2896            GET_ARG;
2897            break;
2898
2899        case SRE_OP_RANGE:
2900            GET_ARG;
2901            GET_ARG;
2902            break;
2903
2904        case SRE_OP_CHARSET:
2905            offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */
2906            if (offset > end-code)
2907                FAIL;
2908            code += offset;
2909            break;
2910
2911        case SRE_OP_BIGCHARSET:
2912            GET_ARG; /* Number of blocks */
2913            offset = 256/sizeof(SRE_CODE); /* 256-byte table */
2914            if (offset > end-code)
2915                FAIL;
2916            /* Make sure that each byte points to a valid block */
2917            for (i = 0; i < 256; i++) {
2918                if (((unsigned char *)code)[i] >= arg)
2919                    FAIL;
2920            }
2921            code += offset;
2922            offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */
2923            if (offset > end-code)
2924                FAIL;
2925            code += offset;
2926            break;
2927
2928        case SRE_OP_CATEGORY:
2929            GET_ARG;
2930            switch (arg) {
2931            case SRE_CATEGORY_DIGIT:
2932            case SRE_CATEGORY_NOT_DIGIT:
2933            case SRE_CATEGORY_SPACE:
2934            case SRE_CATEGORY_NOT_SPACE:
2935            case SRE_CATEGORY_WORD:
2936            case SRE_CATEGORY_NOT_WORD:
2937            case SRE_CATEGORY_LINEBREAK:
2938            case SRE_CATEGORY_NOT_LINEBREAK:
2939            case SRE_CATEGORY_LOC_WORD:
2940            case SRE_CATEGORY_LOC_NOT_WORD:
2941            case SRE_CATEGORY_UNI_DIGIT:
2942            case SRE_CATEGORY_UNI_NOT_DIGIT:
2943            case SRE_CATEGORY_UNI_SPACE:
2944            case SRE_CATEGORY_UNI_NOT_SPACE:
2945            case SRE_CATEGORY_UNI_WORD:
2946            case SRE_CATEGORY_UNI_NOT_WORD:
2947            case SRE_CATEGORY_UNI_LINEBREAK:
2948            case SRE_CATEGORY_UNI_NOT_LINEBREAK:
2949                break;
2950            default:
2951                FAIL;
2952            }
2953            break;
2954
2955        default:
2956            FAIL;
2957
2958        }
2959    }
2960
2961    return 1;
2962}
2963
2964static int
2965_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
2966{
2967    /* Some variables are manipulated by the macros above */
2968    SRE_CODE op;
2969    SRE_CODE arg;
2970    SRE_CODE skip;
2971
2972    VTRACE(("code=%p, end=%p\n", code, end));
2973
2974    if (code > end)
2975        FAIL;
2976
2977    while (code < end) {
2978        GET_OP;
2979        switch (op) {
2980
2981        case SRE_OP_MARK:
2982            /* We don't check whether marks are properly nested; the
2983               sre_match() code is robust even if they don't, and the worst
2984               you can get is nonsensical match results. */
2985            GET_ARG;
2986            if (arg > 2*groups+1) {
2987                VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
2988                FAIL;
2989            }
2990            break;
2991
2992        case SRE_OP_LITERAL:
2993        case SRE_OP_NOT_LITERAL:
2994        case SRE_OP_LITERAL_IGNORE:
2995        case SRE_OP_NOT_LITERAL_IGNORE:
2996            GET_ARG;
2997            /* The arg is just a character, nothing to check */
2998            break;
2999
3000        case SRE_OP_SUCCESS:
3001        case SRE_OP_FAILURE:
3002            /* Nothing to check; these normally end the matching process */
3003            break;
3004
3005        case SRE_OP_AT:
3006            GET_ARG;
3007            switch (arg) {
3008            case SRE_AT_BEGINNING:
3009            case SRE_AT_BEGINNING_STRING:
3010            case SRE_AT_BEGINNING_LINE:
3011            case SRE_AT_END:
3012            case SRE_AT_END_LINE:
3013            case SRE_AT_END_STRING:
3014            case SRE_AT_BOUNDARY:
3015            case SRE_AT_NON_BOUNDARY:
3016            case SRE_AT_LOC_BOUNDARY:
3017            case SRE_AT_LOC_NON_BOUNDARY:
3018            case SRE_AT_UNI_BOUNDARY:
3019            case SRE_AT_UNI_NON_BOUNDARY:
3020                break;
3021            default:
3022                FAIL;
3023            }
3024            break;
3025
3026        case SRE_OP_ANY:
3027        case SRE_OP_ANY_ALL:
3028            /* These have no operands */
3029            break;
3030
3031        case SRE_OP_IN:
3032        case SRE_OP_IN_IGNORE:
3033            GET_SKIP;
3034            /* Stop 1 before the end; we check the FAILURE below */
3035            if (!_validate_charset(code, code+skip-2))
3036                FAIL;
3037            if (code[skip-2] != SRE_OP_FAILURE)
3038                FAIL;
3039            code += skip-1;
3040            break;
3041
3042        case SRE_OP_INFO:
3043            {
3044                /* A minimal info field is
3045                   <INFO> <1=skip> <2=flags> <3=min> <4=max>;
3046                   If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
3047                   more follows. */
3048                SRE_CODE flags, i;
3049                SRE_CODE *newcode;
3050                GET_SKIP;
3051                newcode = code+skip-1;
3052                GET_ARG; flags = arg;
3053                GET_ARG; /* min */
3054                GET_ARG; /* max */
3055                /* Check that only valid flags are present */
3056                if ((flags & ~(SRE_INFO_PREFIX |
3057                               SRE_INFO_LITERAL |
3058                               SRE_INFO_CHARSET)) != 0)
3059                    FAIL;
3060                /* PREFIX and CHARSET are mutually exclusive */
3061                if ((flags & SRE_INFO_PREFIX) &&
3062                    (flags & SRE_INFO_CHARSET))
3063                    FAIL;
3064                /* LITERAL implies PREFIX */
3065                if ((flags & SRE_INFO_LITERAL) &&
3066                    !(flags & SRE_INFO_PREFIX))
3067                    FAIL;
3068                /* Validate the prefix */
3069                if (flags & SRE_INFO_PREFIX) {
3070                    SRE_CODE prefix_len;
3071                    GET_ARG; prefix_len = arg;
3072                    GET_ARG; /* prefix skip */
3073                    /* Here comes the prefix string */
3074                    if (prefix_len > newcode-code)
3075                        FAIL;
3076                    code += prefix_len;
3077                    /* And here comes the overlap table */
3078                    if (prefix_len > newcode-code)
3079                        FAIL;
3080                    /* Each overlap value should be < prefix_len */
3081                    for (i = 0; i < prefix_len; i++) {
3082                        if (code[i] >= prefix_len)
3083                            FAIL;
3084                    }
3085                    code += prefix_len;
3086                }
3087                /* Validate the charset */
3088                if (flags & SRE_INFO_CHARSET) {
3089                    if (!_validate_charset(code, newcode-1))
3090                        FAIL;
3091                    if (newcode[-1] != SRE_OP_FAILURE)
3092                        FAIL;
3093                    code = newcode;
3094                }
3095                else if (code != newcode) {
3096                  VTRACE(("code=%p, newcode=%p\n", code, newcode));
3097                    FAIL;
3098                }
3099            }
3100            break;
3101
3102        case SRE_OP_BRANCH:
3103            {
3104                SRE_CODE *target = NULL;
3105                for (;;) {
3106                    GET_SKIP;
3107                    if (skip == 0)
3108                        break;
3109                    /* Stop 2 before the end; we check the JUMP below */
3110                    if (!_validate_inner(code, code+skip-3, groups))
3111                        FAIL;
3112                    code += skip-3;
3113                    /* Check that it ends with a JUMP, and that each JUMP
3114                       has the same target */
3115                    GET_OP;
3116                    if (op != SRE_OP_JUMP)
3117                        FAIL;
3118                    GET_SKIP;
3119                    if (target == NULL)
3120                        target = code+skip-1;
3121                    else if (code+skip-1 != target)
3122                        FAIL;
3123                }
3124            }
3125            break;
3126
3127        case SRE_OP_REPEAT_ONE:
3128        case SRE_OP_MIN_REPEAT_ONE:
3129            {
3130                SRE_CODE min, max;
3131                GET_SKIP;
3132                GET_ARG; min = arg;
3133                GET_ARG; max = arg;
3134                if (min > max)
3135                    FAIL;
3136                if (max > SRE_MAXREPEAT)
3137                    FAIL;
3138                if (!_validate_inner(code, code+skip-4, groups))
3139                    FAIL;
3140                code += skip-4;
3141                GET_OP;
3142                if (op != SRE_OP_SUCCESS)
3143                    FAIL;
3144            }
3145            break;
3146
3147        case SRE_OP_REPEAT:
3148            {
3149                SRE_CODE min, max;
3150                GET_SKIP;
3151                GET_ARG; min = arg;
3152                GET_ARG; max = arg;
3153                if (min > max)
3154                    FAIL;
3155                if (max > SRE_MAXREPEAT)
3156                    FAIL;
3157                if (!_validate_inner(code, code+skip-3, groups))
3158                    FAIL;
3159                code += skip-3;
3160                GET_OP;
3161                if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
3162                    FAIL;
3163            }
3164            break;
3165
3166        case SRE_OP_GROUPREF:
3167        case SRE_OP_GROUPREF_IGNORE:
3168            GET_ARG;
3169            if (arg >= groups)
3170                FAIL;
3171            break;
3172
3173        case SRE_OP_GROUPREF_EXISTS:
3174            /* The regex syntax for this is: '(?(group)then|else)', where
3175               'group' is either an integer group number or a group name,
3176               'then' and 'else' are sub-regexes, and 'else' is optional. */
3177            GET_ARG;
3178            if (arg >= groups)
3179                FAIL;
3180            GET_SKIP_ADJ(1);
3181            code--; /* The skip is relative to the first arg! */
3182            /* There are two possibilities here: if there is both a 'then'
3183               part and an 'else' part, the generated code looks like:
3184
3185               GROUPREF_EXISTS
3186               <group>
3187               <skipyes>
3188               ...then part...
3189               JUMP
3190               <skipno>
3191               (<skipyes> jumps here)
3192               ...else part...
3193               (<skipno> jumps here)
3194
3195               If there is only a 'then' part, it looks like:
3196
3197               GROUPREF_EXISTS
3198               <group>
3199               <skip>
3200               ...then part...
3201               (<skip> jumps here)
3202
3203               There is no direct way to decide which it is, and we don't want
3204               to allow arbitrary jumps anywhere in the code; so we just look
3205               for a JUMP opcode preceding our skip target.
3206            */
3207            if (skip >= 3 && skip-3 < end-code &&
3208                code[skip-3] == SRE_OP_JUMP)
3209            {
3210                VTRACE(("both then and else parts present\n"));
3211                if (!_validate_inner(code+1, code+skip-3, groups))
3212                    FAIL;
3213                code += skip-2; /* Position after JUMP, at <skipno> */
3214                GET_SKIP;
3215                if (!_validate_inner(code, code+skip-1, groups))
3216                    FAIL;
3217                code += skip-1;
3218            }
3219            else {
3220                VTRACE(("only a then part present\n"));
3221                if (!_validate_inner(code+1, code+skip-1, groups))
3222                    FAIL;
3223                code += skip-1;
3224            }
3225            break;
3226
3227        case SRE_OP_ASSERT:
3228        case SRE_OP_ASSERT_NOT:
3229            GET_SKIP;
3230            GET_ARG; /* 0 for lookahead, width for lookbehind */
3231            code--; /* Back up over arg to simplify math below */
3232            if (arg & 0x80000000)
3233                FAIL; /* Width too large */
3234            /* Stop 1 before the end; we check the SUCCESS below */
3235            if (!_validate_inner(code+1, code+skip-2, groups))
3236                FAIL;
3237            code += skip-2;
3238            GET_OP;
3239            if (op != SRE_OP_SUCCESS)
3240                FAIL;
3241            break;
3242
3243        default:
3244            FAIL;
3245
3246        }
3247    }
3248
3249    VTRACE(("okay\n"));
3250    return 1;
3251}
3252
3253static int
3254_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
3255{
3256    if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
3257        FAIL;
3258    if (groups == 0)  /* fix for simplejson */
3259        groups = 100; /* 100 groups should always be safe */
3260    return _validate_inner(code, end-1, groups);
3261}
3262
3263static int
3264_validate(PatternObject *self)
3265{
3266    if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
3267    {
3268        PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
3269        return 0;
3270    }
3271    else
3272        VTRACE(("Success!\n"));
3273    return 1;
3274}
3275
3276/* -------------------------------------------------------------------- */
3277/* match methods */
3278
3279static void
3280match_dealloc(MatchObject* self)
3281{
3282    Py_XDECREF(self->regs);
3283    Py_XDECREF(self->string);
3284    Py_DECREF(self->pattern);
3285    PyObject_DEL(self);
3286}
3287
3288static PyObject*
3289match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def)
3290{
3291    if (index < 0 || index >= self->groups) {
3292        /* raise IndexError if we were given a bad group number */
3293        PyErr_SetString(
3294            PyExc_IndexError,
3295            "no such group"
3296            );
3297        return NULL;
3298    }
3299
3300    index *= 2;
3301
3302    if (self->string == Py_None || self->mark[index] < 0) {
3303        /* return default value if the string or group is undefined */
3304        Py_INCREF(def);
3305        return def;
3306    }
3307
3308    return PySequence_GetSlice(
3309        self->string, self->mark[index], self->mark[index+1]
3310        );
3311}
3312
3313static Py_ssize_t
3314match_getindex(MatchObject* self, PyObject* index)
3315{
3316    Py_ssize_t i;
3317
3318    if (PyInt_Check(index) || PyLong_Check(index))
3319        return PyInt_AsSsize_t(index);
3320
3321    i = -1;
3322
3323    if (self->pattern->groupindex) {
3324        index = PyObject_GetItem(self->pattern->groupindex, index);
3325        if (index) {
3326            if (PyInt_Check(index) || PyLong_Check(index))
3327                i = PyInt_AsSsize_t(index);
3328            Py_DECREF(index);
3329        } else
3330            PyErr_Clear();
3331    }
3332
3333    return i;
3334}
3335
3336static PyObject*
3337match_getslice(MatchObject* self, PyObject* index, PyObject* def)
3338{
3339    return match_getslice_by_index(self, match_getindex(self, index), def);
3340}
3341
3342static PyObject*
3343match_expand(MatchObject* self, PyObject* ptemplate)
3344{
3345    /* delegate to Python code */
3346    return call(
3347        SRE_PY_MODULE, "_expand",
3348        PyTuple_Pack(3, self->pattern, self, ptemplate)
3349        );
3350}
3351
3352static PyObject*
3353match_group(MatchObject* self, PyObject* args)
3354{
3355    PyObject* result;
3356    Py_ssize_t i, size;
3357
3358    size = PyTuple_GET_SIZE(args);
3359
3360    switch (size) {
3361    case 0:
3362        result = match_getslice(self, Py_False, Py_None);
3363        break;
3364    case 1:
3365        result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None);
3366        break;
3367    default:
3368        /* fetch multiple items */
3369        result = PyTuple_New(size);
3370        if (!result)
3371            return NULL;
3372        for (i = 0; i < size; i++) {
3373            PyObject* item = match_getslice(
3374                self, PyTuple_GET_ITEM(args, i), Py_None
3375                );
3376            if (!item) {
3377                Py_DECREF(result);
3378                return NULL;
3379            }
3380            PyTuple_SET_ITEM(result, i, item);
3381        }
3382        break;
3383    }
3384    return result;
3385}
3386
3387static PyObject*
3388match_groups(MatchObject* self, PyObject* args, PyObject* kw)
3389{
3390    PyObject* result;
3391    Py_ssize_t index;
3392
3393    PyObject* def = Py_None;
3394    static char* kwlist[] = { "default", NULL };
3395    if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groups", kwlist, &def))
3396        return NULL;
3397
3398    result = PyTuple_New(self->groups-1);
3399    if (!result)
3400        return NULL;
3401
3402    for (index = 1; index < self->groups; index++) {
3403        PyObject* item;
3404        item = match_getslice_by_index(self, index, def);
3405        if (!item) {
3406            Py_DECREF(result);
3407            return NULL;
3408        }
3409        PyTuple_SET_ITEM(result, index-1, item);
3410    }
3411
3412    return result;
3413}
3414
3415static PyObject*
3416match_groupdict(MatchObject* self, PyObject* args, PyObject* kw)
3417{
3418    PyObject* result;
3419    PyObject* keys;
3420    Py_ssize_t index;
3421
3422    PyObject* def = Py_None;
3423    static char* kwlist[] = { "default", NULL };
3424    if (!PyArg_ParseTupleAndKeywords(args, kw, "|O:groupdict", kwlist, &def))
3425        return NULL;
3426
3427    result = PyDict_New();
3428    if (!result || !self->pattern->groupindex)
3429        return result;
3430
3431    keys = PyMapping_Keys(self->pattern->groupindex);
3432    if (!keys)
3433        goto failed;
3434
3435    for (index = 0; index < PyList_GET_SIZE(keys); index++) {
3436        int status;
3437        PyObject* key;
3438        PyObject* value;
3439        key = PyList_GET_ITEM(keys, index);
3440        if (!key)
3441            goto failed;
3442        value = match_getslice(self, key, def);
3443        if (!value) {
3444            Py_DECREF(key);
3445            goto failed;
3446        }
3447        status = PyDict_SetItem(result, key, value);
3448        Py_DECREF(value);
3449        if (status < 0)
3450            goto failed;
3451    }
3452
3453    Py_DECREF(keys);
3454
3455    return result;
3456
3457failed:
3458    Py_XDECREF(keys);
3459    Py_DECREF(result);
3460    return NULL;
3461}
3462
3463static PyObject*
3464match_start(MatchObject* self, PyObject* args)
3465{
3466    Py_ssize_t index;
3467
3468    PyObject* index_ = Py_False; /* zero */
3469    if (!PyArg_UnpackTuple(args, "start", 0, 1, &index_))
3470        return NULL;
3471
3472    index = match_getindex(self, index_);
3473
3474    if (index < 0 || index >= self->groups) {
3475        PyErr_SetString(
3476            PyExc_IndexError,
3477            "no such group"
3478            );
3479        return NULL;
3480    }
3481
3482    /* mark is -1 if group is undefined */
3483    return PyInt_FromSsize_t(self->mark[index*2]);
3484}
3485
3486static PyObject*
3487match_end(MatchObject* self, PyObject* args)
3488{
3489    Py_ssize_t index;
3490
3491    PyObject* index_ = Py_False; /* zero */
3492    if (!PyArg_UnpackTuple(args, "end", 0, 1, &index_))
3493        return NULL;
3494
3495    index = match_getindex(self, index_);
3496
3497    if (index < 0 || index >= self->groups) {
3498        PyErr_SetString(
3499            PyExc_IndexError,
3500            "no such group"
3501            );
3502        return NULL;
3503    }
3504
3505    /* mark is -1 if group is undefined */
3506    return PyInt_FromSsize_t(self->mark[index*2+1]);
3507}
3508
3509LOCAL(PyObject*)
3510_pair(Py_ssize_t i1, Py_ssize_t i2)
3511{
3512    PyObject* pair;
3513    PyObject* item;
3514
3515    pair = PyTuple_New(2);
3516    if (!pair)
3517        return NULL;
3518
3519    item = PyInt_FromSsize_t(i1);
3520    if (!item)
3521        goto error;
3522    PyTuple_SET_ITEM(pair, 0, item);
3523
3524    item = PyInt_FromSsize_t(i2);
3525    if (!item)
3526        goto error;
3527    PyTuple_SET_ITEM(pair, 1, item);
3528
3529    return pair;
3530
3531  error:
3532    Py_DECREF(pair);
3533    return NULL;
3534}
3535
3536static PyObject*
3537match_span(MatchObject* self, PyObject* args)
3538{
3539    Py_ssize_t index;
3540
3541    PyObject* index_ = Py_False; /* zero */
3542    if (!PyArg_UnpackTuple(args, "span", 0, 1, &index_))
3543        return NULL;
3544
3545    index = match_getindex(self, index_);
3546
3547    if (index < 0 || index >= self->groups) {
3548        PyErr_SetString(
3549            PyExc_IndexError,
3550            "no such group"
3551            );
3552        return NULL;
3553    }
3554
3555    /* marks are -1 if group is undefined */
3556    return _pair(self->mark[index*2], self->mark[index*2+1]);
3557}
3558
3559static PyObject*
3560match_regs(MatchObject* self)
3561{
3562    PyObject* regs;
3563    PyObject* item;
3564    Py_ssize_t index;
3565
3566    regs = PyTuple_New(self->groups);
3567    if (!regs)
3568        return NULL;
3569
3570    for (index = 0; index < self->groups; index++) {
3571        item = _pair(self->mark[index*2], self->mark[index*2+1]);
3572        if (!item) {
3573            Py_DECREF(regs);
3574            return NULL;
3575        }
3576        PyTuple_SET_ITEM(regs, index, item);
3577    }
3578
3579    Py_INCREF(regs);
3580    self->regs = regs;
3581
3582    return regs;
3583}
3584
3585static PyObject*
3586match_copy(MatchObject* self, PyObject *unused)
3587{
3588#ifdef USE_BUILTIN_COPY
3589    MatchObject* copy;
3590    Py_ssize_t slots, offset;
3591
3592    slots = 2 * (self->pattern->groups+1);
3593
3594    copy = PyObject_NEW_VAR(MatchObject, &Match_Type, slots);
3595    if (!copy)
3596        return NULL;
3597
3598    /* this value a constant, but any compiler should be able to
3599       figure that out all by itself */
3600    offset = offsetof(MatchObject, string);
3601
3602    Py_XINCREF(self->pattern);
3603    Py_XINCREF(self->string);
3604    Py_XINCREF(self->regs);
3605
3606    memcpy((char*) copy + offset, (char*) self + offset,
3607           sizeof(MatchObject) + slots * sizeof(Py_ssize_t) - offset);
3608
3609    return (PyObject*) copy;
3610#else
3611    PyErr_SetString(PyExc_TypeError, "cannot copy this match object");
3612    return NULL;
3613#endif
3614}
3615
3616static PyObject*
3617match_deepcopy(MatchObject* self, PyObject* memo)
3618{
3619#ifdef USE_BUILTIN_COPY
3620    MatchObject* copy;
3621
3622    copy = (MatchObject*) match_copy(self);
3623    if (!copy)
3624        return NULL;
3625
3626    if (!deepcopy((PyObject**) &copy->pattern, memo) ||
3627        !deepcopy(&copy->string, memo) ||
3628        !deepcopy(&copy->regs, memo)) {
3629        Py_DECREF(copy);
3630        return NULL;
3631    }
3632
3633#else
3634    PyErr_SetString(PyExc_TypeError, "cannot deepcopy this match object");
3635    return NULL;
3636#endif
3637}
3638
3639PyDoc_STRVAR(match_doc,
3640"The result of re.match() and re.search().\n\
3641Match objects always have a boolean value of True.");
3642
3643PyDoc_STRVAR(match_group_doc,
3644"group([group1, ...]) -> str or tuple.\n\
3645    Return subgroup(s) of the match by indices or names.\n\
3646    For 0 returns the entire match.");
3647
3648PyDoc_STRVAR(match_start_doc,
3649"start([group=0]) -> int.\n\
3650    Return index of the start of the substring matched by group.");
3651
3652PyDoc_STRVAR(match_end_doc,
3653"end([group=0]) -> int.\n\
3654    Return index of the end of the substring matched by group.");
3655
3656PyDoc_STRVAR(match_span_doc,
3657"span([group]) -> tuple.\n\
3658    For MatchObject m, return the 2-tuple (m.start(group), m.end(group)).");
3659
3660PyDoc_STRVAR(match_groups_doc,
3661"groups([default=None]) -> tuple.\n\
3662    Return a tuple containing all the subgroups of the match, from 1.\n\
3663    The default argument is used for groups\n\
3664    that did not participate in the match");
3665
3666PyDoc_STRVAR(match_groupdict_doc,
3667"groupdict([default=None]) -> dict.\n\
3668    Return a dictionary containing all the named subgroups of the match,\n\
3669    keyed by the subgroup name. The default argument is used for groups\n\
3670    that did not participate in the match");
3671
3672PyDoc_STRVAR(match_expand_doc,
3673"expand(template) -> str.\n\
3674    Return the string obtained by doing backslash substitution\n\
3675    on the string template, as done by the sub() method.");
3676
3677static PyMethodDef match_methods[] = {
3678    {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc},
3679    {"start", (PyCFunction) match_start, METH_VARARGS, match_start_doc},
3680    {"end", (PyCFunction) match_end, METH_VARARGS, match_end_doc},
3681    {"span", (PyCFunction) match_span, METH_VARARGS, match_span_doc},
3682    {"groups", (PyCFunction) match_groups, METH_VARARGS|METH_KEYWORDS,
3683        match_groups_doc},
3684    {"groupdict", (PyCFunction) match_groupdict, METH_VARARGS|METH_KEYWORDS,
3685        match_groupdict_doc},
3686    {"expand", (PyCFunction) match_expand, METH_O, match_expand_doc},
3687    {"__copy__", (PyCFunction) match_copy, METH_NOARGS},
3688    {"__deepcopy__", (PyCFunction) match_deepcopy, METH_O},
3689    {NULL, NULL}
3690};
3691
3692static PyObject *
3693match_lastindex_get(MatchObject *self)
3694{
3695    if (self->lastindex >= 0)
3696        return PyInt_FromSsize_t(self->lastindex);
3697    Py_INCREF(Py_None);
3698    return Py_None;
3699}
3700
3701static PyObject *
3702match_lastgroup_get(MatchObject *self)
3703{
3704    if (self->pattern->indexgroup && self->lastindex >= 0) {
3705        PyObject* result = PySequence_GetItem(
3706            self->pattern->indexgroup, self->lastindex
3707            );
3708        if (result)
3709            return result;
3710        PyErr_Clear();
3711    }
3712    Py_INCREF(Py_None);
3713    return Py_None;
3714}
3715
3716static PyObject *
3717match_regs_get(MatchObject *self)
3718{
3719    if (self->regs) {
3720        Py_INCREF(self->regs);
3721        return self->regs;
3722    } else
3723        return match_regs(self);
3724}
3725
3726static PyGetSetDef match_getset[] = {
3727    {"lastindex", (getter)match_lastindex_get, (setter)NULL},
3728    {"lastgroup", (getter)match_lastgroup_get, (setter)NULL},
3729    {"regs",      (getter)match_regs_get,      (setter)NULL},
3730    {NULL}
3731};
3732
3733#define MATCH_OFF(x) offsetof(MatchObject, x)
3734static PyMemberDef match_members[] = {
3735    {"string",  T_OBJECT,   MATCH_OFF(string),  READONLY},
3736    {"re",      T_OBJECT,   MATCH_OFF(pattern), READONLY},
3737    {"pos",     T_PYSSIZET, MATCH_OFF(pos),     READONLY},
3738    {"endpos",  T_PYSSIZET, MATCH_OFF(endpos),  READONLY},
3739    {NULL}
3740};
3741
3742
3743/* FIXME: implement setattr("string", None) as a special case (to
3744   detach the associated string, if any */
3745
3746static PyTypeObject Match_Type = {
3747    PyVarObject_HEAD_INIT(NULL, 0)
3748    "_" SRE_MODULE ".SRE_Match",
3749    sizeof(MatchObject), sizeof(Py_ssize_t),
3750    (destructor)match_dealloc,  /* tp_dealloc */
3751    0,                          /* tp_print */
3752    0,                          /* tp_getattr */
3753    0,                          /* tp_setattr */
3754    0,                          /* tp_compare */
3755    0,                          /* tp_repr */
3756    0,                          /* tp_as_number */
3757    0,                          /* tp_as_sequence */
3758    0,                          /* tp_as_mapping */
3759    0,                          /* tp_hash */
3760    0,                          /* tp_call */
3761    0,                          /* tp_str */
3762    0,                          /* tp_getattro */
3763    0,                          /* tp_setattro */
3764    0,                          /* tp_as_buffer */
3765    Py_TPFLAGS_DEFAULT,
3766    match_doc,                  /* tp_doc */
3767    0,                          /* tp_traverse */
3768    0,                          /* tp_clear */
3769    0,                          /* tp_richcompare */
3770    0,                          /* tp_weaklistoffset */
3771    0,                          /* tp_iter */
3772    0,                          /* tp_iternext */
3773    match_methods,    /* tp_methods */
3774    match_members,    /* tp_members */
3775    match_getset,           /* tp_getset */
3776};
3777
3778static PyObject*
3779pattern_new_match(PatternObject* pattern, SRE_STATE* state, int status)
3780{
3781    /* create match object (from state object) */
3782
3783    MatchObject* match;
3784    Py_ssize_t i, j;
3785    char* base;
3786    int n;
3787
3788    if (status > 0) {
3789
3790        /* create match object (with room for extra group marks) */
3791        /* coverity[ampersand_in_size] */
3792        match = PyObject_NEW_VAR(MatchObject, &Match_Type,
3793                                 2*(pattern->groups+1));
3794        if (!match)
3795            return NULL;
3796
3797        Py_INCREF(pattern);
3798        match->pattern = pattern;
3799
3800        Py_INCREF(state->string);
3801        match->string = state->string;
3802
3803        match->regs = NULL;
3804        match->groups = pattern->groups+1;
3805
3806        /* fill in group slices */
3807
3808        base = (char*) state->beginning;
3809        n = state->charsize;
3810
3811        match->mark[0] = ((char*) state->start - base) / n;
3812        match->mark[1] = ((char*) state->ptr - base) / n;
3813
3814        for (i = j = 0; i < pattern->groups; i++, j+=2)
3815            if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) {
3816                match->mark[j+2] = ((char*) state->mark[j] - base) / n;
3817                match->mark[j+3] = ((char*) state->mark[j+1] - base) / n;
3818            } else
3819                match->mark[j+2] = match->mark[j+3] = -1; /* undefined */
3820
3821        match->pos = state->pos;
3822        match->endpos = state->endpos;
3823
3824        match->lastindex = state->lastindex;
3825
3826        return (PyObject*) match;
3827
3828    } else if (status == 0) {
3829
3830        /* no match */
3831        Py_INCREF(Py_None);
3832        return Py_None;
3833
3834    }
3835
3836    /* internal error */
3837    pattern_error(status);
3838    return NULL;
3839}
3840
3841
3842/* -------------------------------------------------------------------- */
3843/* scanner methods (experimental) */
3844
3845static void
3846scanner_dealloc(ScannerObject* self)
3847{
3848    state_fini(&self->state);
3849    Py_XDECREF(self->pattern);
3850    PyObject_DEL(self);
3851}
3852
3853static PyObject*
3854scanner_match(ScannerObject* self, PyObject *unused)
3855{
3856    SRE_STATE* state = &self->state;
3857    PyObject* match;
3858    int status;
3859
3860    state_reset(state);
3861
3862    state->ptr = state->start;
3863
3864    if (state->charsize == 1) {
3865        status = sre_match(state, PatternObject_GetCode(self->pattern));
3866    } else {
3867#if defined(HAVE_UNICODE)
3868        status = sre_umatch(state, PatternObject_GetCode(self->pattern));
3869#endif
3870    }
3871    if (PyErr_Occurred())
3872        return NULL;
3873
3874    match = pattern_new_match((PatternObject*) self->pattern,
3875                               state, status);
3876
3877    if (status == 0 || state->ptr == state->start)
3878        state->start = (void*) ((char*) state->ptr + state->charsize);
3879    else
3880        state->start = state->ptr;
3881
3882    return match;
3883}
3884
3885
3886static PyObject*
3887scanner_search(ScannerObject* self, PyObject *unused)
3888{
3889    SRE_STATE* state = &self->state;
3890    PyObject* match;
3891    int status;
3892
3893    state_reset(state);
3894
3895    state->ptr = state->start;
3896
3897    if (state->charsize == 1) {
3898        status = sre_search(state, PatternObject_GetCode(self->pattern));
3899    } else {
3900#if defined(HAVE_UNICODE)
3901        status = sre_usearch(state, PatternObject_GetCode(self->pattern));
3902#endif
3903    }
3904    if (PyErr_Occurred())
3905        return NULL;
3906
3907    match = pattern_new_match((PatternObject*) self->pattern,
3908                               state, status);
3909
3910    if (status == 0 || state->ptr == state->start)
3911        state->start = (void*) ((char*) state->ptr + state->charsize);
3912    else
3913        state->start = state->ptr;
3914
3915    return match;
3916}
3917
3918static PyMethodDef scanner_methods[] = {
3919    {"match", (PyCFunction) scanner_match, METH_NOARGS},
3920    {"search", (PyCFunction) scanner_search, METH_NOARGS},
3921    {NULL, NULL}
3922};
3923
3924#define SCAN_OFF(x) offsetof(ScannerObject, x)
3925static PyMemberDef scanner_members[] = {
3926    {"pattern", T_OBJECT, SCAN_OFF(pattern),  READONLY},
3927    {NULL}  /* Sentinel */
3928};
3929
3930statichere PyTypeObject Scanner_Type = {
3931    PyObject_HEAD_INIT(NULL)
3932    0, "_" SRE_MODULE ".SRE_Scanner",
3933    sizeof(ScannerObject), 0,
3934    (destructor)scanner_dealloc, /*tp_dealloc*/
3935    0,        /* tp_print */
3936    0,        /* tp_getattr */
3937    0,        /* tp_setattr */
3938    0,        /* tp_reserved */
3939    0,        /* tp_repr */
3940    0,        /* tp_as_number */
3941    0,        /* tp_as_sequence */
3942    0,        /* tp_as_mapping */
3943    0,        /* tp_hash */
3944    0,        /* tp_call */
3945    0,        /* tp_str */
3946    0,        /* tp_getattro */
3947    0,        /* tp_setattro */
3948    0,        /* tp_as_buffer */
3949    Py_TPFLAGS_DEFAULT,   /* tp_flags */
3950    0,        /* tp_doc */
3951    0,        /* tp_traverse */
3952    0,        /* tp_clear */
3953    0,        /* tp_richcompare */
3954    0,        /* tp_weaklistoffset */
3955    0,        /* tp_iter */
3956    0,        /* tp_iternext */
3957    scanner_methods,    /* tp_methods */
3958    scanner_members,    /* tp_members */
3959    0,        /* tp_getset */
3960};
3961
3962static PyObject*
3963pattern_scanner(PatternObject* pattern, PyObject* args)
3964{
3965    /* create search state object */
3966
3967    ScannerObject* self;
3968
3969    PyObject* string;
3970    Py_ssize_t start = 0;
3971    Py_ssize_t end = PY_SSIZE_T_MAX;
3972    if (!PyArg_ParseTuple(args, "O|nn:scanner", &string, &start, &end))
3973        return NULL;
3974
3975    /* create scanner object */
3976    self = PyObject_NEW(ScannerObject, &Scanner_Type);
3977    if (!self)
3978        return NULL;
3979    self->pattern = NULL;
3980
3981    string = state_init(&self->state, pattern, string, start, end);
3982    if (!string) {
3983        Py_DECREF(self);
3984        return NULL;
3985    }
3986
3987    Py_INCREF(pattern);
3988    self->pattern = (PyObject*) pattern;
3989
3990    return (PyObject*) self;
3991}
3992
3993static PyMethodDef _functions[] = {
3994    {"compile", _compile, METH_VARARGS},
3995    {"getcodesize", sre_codesize, METH_NOARGS},
3996    {"getlower", sre_getlower, METH_VARARGS},
3997    {NULL, NULL}
3998};
3999
4000#if PY_VERSION_HEX < 0x02030000
4001DL_EXPORT(void) init_sre(void)
4002#else
4003PyMODINIT_FUNC init_sre(void)
4004#endif
4005{
4006    PyObject* m;
4007    PyObject* d;
4008    PyObject* x;
4009
4010    /* Patch object types */
4011    if (PyType_Ready(&Pattern_Type) || PyType_Ready(&Match_Type) ||
4012        PyType_Ready(&Scanner_Type))
4013        return;
4014
4015    m = Py_InitModule("_" SRE_MODULE, _functions);
4016    if (m == NULL)
4017      return;
4018    d = PyModule_GetDict(m);
4019
4020    x = PyInt_FromLong(SRE_MAGIC);
4021    if (x) {
4022        PyDict_SetItemString(d, "MAGIC", x);
4023        Py_DECREF(x);
4024    }
4025
4026    x = PyInt_FromLong(sizeof(SRE_CODE));
4027    if (x) {
4028        PyDict_SetItemString(d, "CODESIZE", x);
4029        Py_DECREF(x);
4030    }
4031
4032    x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
4033    if (x) {
4034        PyDict_SetItemString(d, "MAXREPEAT", x);
4035        Py_DECREF(x);
4036    }
4037
4038    x = PyString_FromString(copyright);
4039    if (x) {
4040        PyDict_SetItemString(d, "copyright", x);
4041        Py_DECREF(x);
4042    }
4043}
4044
4045#endif /* !defined(SRE_RECURSIVE) */
4046
4047/* vim:ts=4:sw=4:et
4048*/
4049