17eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel/*
27eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel * Secret Labs' Regular Expression Engine
37eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel *
47eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel * regular expression matching engine
57eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel *
67eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.
77eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel *
87eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel * See the _sre.c file for information on usage and redistribution.
97eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel */
107eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
117eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifndef SRE_INCLUDED
127eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define SRE_INCLUDED
137eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
147eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "sre_constants.h"
157eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
167eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel/* size of a code word (must be unsigned short or larger, and
177eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel   large enough to hold a UCS4 character) */
187eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef Py_USING_UNICODE
197eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel# define SRE_CODE Py_UCS4
207eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel# if SIZEOF_SIZE_T > 4
217eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#  define SRE_MAXREPEAT (~(SRE_CODE)0)
227eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel# else
237eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#  define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
247eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel# endif
257eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#else
267eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel# define SRE_CODE unsigned int
277eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel# if SIZEOF_SIZE_T > SIZEOF_INT
287eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#  define SRE_MAXREPEAT (~(SRE_CODE)0)
297eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel# else
307eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#  define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
317eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel# endif
327eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
337eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
347eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanieltypedef struct {
357eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    PyObject_VAR_HEAD
367eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    Py_ssize_t groups; /* must be first! */
377eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    PyObject* groupindex;
387eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    PyObject* indexgroup;
397eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    /* compatibility */
407eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    PyObject* pattern; /* pattern source (or None) */
417eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    int flags; /* flags used when compiling pattern source */
427eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    PyObject *weakreflist; /* List of weak references */
437eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    /* pattern code */
447eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    Py_ssize_t codesize;
457eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    SRE_CODE code[1];
467eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel} PatternObject;
477eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
487eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define PatternObject_GetCode(o) (((PatternObject*)(o))->code)
497eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
507eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanieltypedef struct {
517eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    PyObject_VAR_HEAD
527eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    PyObject* string; /* link to the target string (must be first) */
537eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    PyObject* regs; /* cached list of matching spans */
547eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    PatternObject* pattern; /* link to the regex (pattern) object */
557eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    Py_ssize_t pos, endpos; /* current target slice */
567eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    Py_ssize_t lastindex; /* last index marker seen by the engine (-1 if none) */
577eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    Py_ssize_t groups; /* number of groups (start/end marks) */
587eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    Py_ssize_t mark[1];
597eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel} MatchObject;
607eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
617eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanieltypedef unsigned int (*SRE_TOLOWER_HOOK)(unsigned int ch);
627eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
637eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel/* FIXME: <fl> shouldn't be a constant, really... */
647eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define SRE_MARK_SIZE 200
657eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
667eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanieltypedef struct SRE_REPEAT_T {
677eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    Py_ssize_t count;
687eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    SRE_CODE* pattern; /* points to REPEAT operator arguments */
697eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    void* last_ptr; /* helper to check for infinite loops */
707eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    struct SRE_REPEAT_T *prev; /* points to previous repeat context */
717eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel} SRE_REPEAT;
727eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
737eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanieltypedef struct {
747eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    /* string pointers */
757eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    void* ptr; /* current position (also end of current slice) */
767eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    void* beginning; /* start of original string */
777eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    void* start; /* start of current slice */
787eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    void* end; /* end of original string */
797eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    /* attributes for the match object */
807eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    PyObject* string;
817eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    Py_ssize_t pos, endpos;
827eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    /* character size */
837eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    int charsize;
847eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    /* registers */
857eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    Py_ssize_t lastindex;
867eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    Py_ssize_t lastmark;
877eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    void* mark[SRE_MARK_SIZE];
887eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    /* dynamically allocated stuff */
897eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    char* data_stack;
907eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    size_t data_stack_size;
917eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    size_t data_stack_base;
927eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    /* current repeat context */
937eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    SRE_REPEAT *repeat;
947eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    /* hooks */
957eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    SRE_TOLOWER_HOOK lower;
967eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel} SRE_STATE;
977eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
987eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanieltypedef struct {
997eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    PyObject_HEAD
1007eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    PyObject* pattern;
1017eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel    SRE_STATE state;
1027eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel} ScannerObject;
1037eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel
1047eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif
105