17eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel/* 27eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel * Secret Labs' Regular Expression Engine 37eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel * 47eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel * regular expression matching engine 57eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel * 67eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. 77eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel * 87eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel * See the _sre.c file for information on usage and redistribution. 97eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel */ 107eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel 117eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifndef SRE_INCLUDED 127eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define SRE_INCLUDED 137eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel 147eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#include "sre_constants.h" 157eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel 167eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel/* size of a code word (must be unsigned short or larger, and 177eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel large enough to hold a UCS4 character) */ 187eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#ifdef Py_USING_UNICODE 197eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel# define SRE_CODE Py_UCS4 207eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel# if SIZEOF_SIZE_T > 4 217eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel# define SRE_MAXREPEAT (~(SRE_CODE)0) 227eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel# else 237eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX) 247eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel# endif 257eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#else 267eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel# define SRE_CODE unsigned int 277eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel# if SIZEOF_SIZE_T > SIZEOF_INT 287eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel# define SRE_MAXREPEAT (~(SRE_CODE)0) 297eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel# else 307eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX) 317eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel# endif 327eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif 337eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel 347eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanieltypedef struct { 357eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel PyObject_VAR_HEAD 367eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel Py_ssize_t groups; /* must be first! */ 377eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel PyObject* groupindex; 387eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel PyObject* indexgroup; 397eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel /* compatibility */ 407eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel PyObject* pattern; /* pattern source (or None) */ 417eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel int flags; /* flags used when compiling pattern source */ 427eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel PyObject *weakreflist; /* List of weak references */ 437eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel /* pattern code */ 447eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel Py_ssize_t codesize; 457eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel SRE_CODE code[1]; 467eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel} PatternObject; 477eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel 487eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define PatternObject_GetCode(o) (((PatternObject*)(o))->code) 497eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel 507eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanieltypedef struct { 517eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel PyObject_VAR_HEAD 527eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel PyObject* string; /* link to the target string (must be first) */ 537eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel PyObject* regs; /* cached list of matching spans */ 547eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel PatternObject* pattern; /* link to the regex (pattern) object */ 557eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel Py_ssize_t pos, endpos; /* current target slice */ 567eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel Py_ssize_t lastindex; /* last index marker seen by the engine (-1 if none) */ 577eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel Py_ssize_t groups; /* number of groups (start/end marks) */ 587eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel Py_ssize_t mark[1]; 597eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel} MatchObject; 607eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel 617eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanieltypedef unsigned int (*SRE_TOLOWER_HOOK)(unsigned int ch); 627eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel 637eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel/* FIXME: <fl> shouldn't be a constant, really... */ 647eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#define SRE_MARK_SIZE 200 657eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel 667eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanieltypedef struct SRE_REPEAT_T { 677eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel Py_ssize_t count; 687eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel SRE_CODE* pattern; /* points to REPEAT operator arguments */ 697eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel void* last_ptr; /* helper to check for infinite loops */ 707eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel struct SRE_REPEAT_T *prev; /* points to previous repeat context */ 717eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel} SRE_REPEAT; 727eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel 737eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanieltypedef struct { 747eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel /* string pointers */ 757eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel void* ptr; /* current position (also end of current slice) */ 767eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel void* beginning; /* start of original string */ 777eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel void* start; /* start of current slice */ 787eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel void* end; /* end of original string */ 797eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel /* attributes for the match object */ 807eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel PyObject* string; 817eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel Py_ssize_t pos, endpos; 827eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel /* character size */ 837eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel int charsize; 847eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel /* registers */ 857eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel Py_ssize_t lastindex; 867eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel Py_ssize_t lastmark; 877eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel void* mark[SRE_MARK_SIZE]; 887eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel /* dynamically allocated stuff */ 897eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel char* data_stack; 907eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel size_t data_stack_size; 917eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel size_t data_stack_base; 927eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel /* current repeat context */ 937eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel SRE_REPEAT *repeat; 947eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel /* hooks */ 957eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel SRE_TOLOWER_HOOK lower; 967eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel} SRE_STATE; 977eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel 987eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDanieltypedef struct { 997eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel PyObject_HEAD 1007eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel PyObject* pattern; 1017eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel SRE_STATE state; 1027eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel} ScannerObject; 1037eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel 1047eb75bccb5dacb658c63db1a9a980950c3d54d42Daryl McDaniel#endif 105