165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Perl-Compatible Regular Expressions * 365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* PCRE is a library of functions to support regular expressions whose syntax 665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichand semantics are as close as possible to those of the Perl 5 language. 765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Written by Philip Hazel 965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Copyright (c) 1997-2014 University of Cambridge 1065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 1165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich----------------------------------------------------------------------------- 1265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichRedistribution and use in source and binary forms, with or without 1365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichmodification, are permitted provided that the following conditions are met: 1465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 1565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich * Redistributions of source code must retain the above copyright notice, 1665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich this list of conditions and the following disclaimer. 1765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 1865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich * Redistributions in binary form must reproduce the above copyright 1965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich notice, this list of conditions and the following disclaimer in the 2065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich documentation and/or other materials provided with the distribution. 2165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 2265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich * Neither the name of the University of Cambridge nor the names of its 2365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich contributors may be used to endorse or promote products derived from 2465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich this software without specific prior written permission. 2565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 2665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 2765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 3065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichLIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 3165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 3265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 3365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 3465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 3565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 3665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPOSSIBILITY OF SUCH DAMAGE. 3765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich----------------------------------------------------------------------------- 3865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 3965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 4065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 4165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This module contains the external function pcre_compile(), along with 4265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsupporting internal functions that are not used by other modules. */ 4365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 4465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 4565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef HAVE_CONFIG_H 4665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#include "config.h" 4765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 4865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 4965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define NLBLOCK cd /* Block containing newline information */ 5065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define PSSTART start_pattern /* Field containing pattern start */ 5165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define PSEND end_pattern /* Field containing pattern end */ 5265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 5365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#include "pcre_internal.h" 5465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 5565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 5665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* When PCRE_DEBUG is defined, we need the pcre(16|32)_printint() function, which 5765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis also used by pcretest. PCRE_DEBUG is not defined when building a production 5865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlibrary. We do not need to select pcre16_printint.c specially, because the 5965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichCOMPILE_PCREx macro will already be appropriately set. */ 6065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 6165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef PCRE_DEBUG 6265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* pcre_printint.c should not include any headers */ 6365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define PCRE_INCLUDED 6465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#include "pcre_printint.c" 6565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#undef PCRE_INCLUDED 6665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 6765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 6865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 6965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Macro for setting individual bits in class bitmaps. */ 7065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 7165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define SETBIT(a,b) a[(b)/8] |= (1 << ((b)&7)) 7265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 7365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Maximum length value to check against when making sure that the integer that 7465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichholds the compiled pattern length does not overflow. We make it a bit less than 7565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichINT_MAX to allow for adding in group terminating bytes, so that we don't have 7665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichto check them every time. */ 7765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 7865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define OFLOW_MAX (INT_MAX - 20) 7965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 8065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Definitions to allow mutual recursion */ 8165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 8265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic int 8365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich add_list_to_class(pcre_uint8 *, pcre_uchar **, int, compile_data *, 8465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uint32 *, unsigned int); 8565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 8665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL 8765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL, int, int, 8865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uint32 *, pcre_int32 *, pcre_uint32 *, pcre_int32 *, branch_chain *, 8965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich compile_data *, int *); 9065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 9165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 9265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 9365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 9465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Code parameters and static tables * 9565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 9665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 9765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This value specifies the size of stack workspace that is used during the 9865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfirst pre-compile phase that determines how much memory is required. The regex 9965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis partly compiled into this space, but the compiled parts are discarded as 10065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsoon as they can be, so that hopefully there will never be an overrun. The code 10165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdoes, however, check for an overrun. The largest amount I've seen used is 218, 10265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichso this number is very generous. 10365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 10465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichThe same workspace is used during the second, actual compile phase for 10565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichremembering forward references to groups so that they can be filled in at the 10665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichend. Each entry in this list occupies LINK_SIZE bytes, so even when LINK_SIZE 10765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis 4 there is plenty of room for most patterns. However, the memory can get 10865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfilled up by repetitions of forward references, for example patterns like 10965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/(?1){0,1999}(b)/, and one user did hit the limit. The code has been changed so 11065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthat the workspace is expanded using malloc() in this situation. The value 11165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbelow is therefore a minimum, and we put a maximum on it for safety. The 11265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichminimum is now also defined in terms of LINK_SIZE so that the use of malloc() 11365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichkicks in at the same number of forward references in all cases. */ 11465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 11565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define COMPILE_WORK_SIZE (2048*LINK_SIZE) 11665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define COMPILE_WORK_SIZE_MAX (100*COMPILE_WORK_SIZE) 11765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 11865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This value determines the size of the initial vector that is used for 11965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichremembering named groups during the pre-compile. It is allocated on the stack, 12065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbut if it is too small, it is expanded using malloc(), in a similar way to the 12165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichworkspace. The value is the number of slots in the list. */ 12265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 12365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define NAMED_GROUP_LIST_SIZE 20 12465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 12565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* The overrun tests check for a slightly smaller size so that they detect the 12665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichoverrun before it actually does run off the end of the data block. */ 12765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 12865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define WORK_SIZE_SAFETY_MARGIN (100) 12965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 13065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Private flags added to firstchar and reqchar. */ 13165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 13265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define REQ_CASELESS (1 << 0) /* Indicates caselessness */ 13365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define REQ_VARY (1 << 1) /* Reqchar followed non-literal item */ 13465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Negative values for the firstchar and reqchar flags */ 13565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define REQ_UNSET (-2) 13665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define REQ_NONE (-1) 13765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 13865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Repeated character flags. */ 13965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 14065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define UTF_LENGTH 0x10000000l /* The char contains its length. */ 14165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 14265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Table for handling escaped characters in the range '0'-'z'. Positive returns 14365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichare simple data values; negative values are for special things like \d and so 14465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichon. Zero means further processing is needed (for things like \x), or the escape 14565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis invalid. */ 14665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 14765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef EBCDIC 14865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 14965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This is the "normal" table for ASCII systems or for EBCDIC systems running 15065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichin UTF-8 mode. */ 15165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 15265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const short int escapes[] = { 15365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, 0, 15465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, 0, 15565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, 0, 15665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, 0, 15765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, 0, 15865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_COLON, CHAR_SEMICOLON, 15965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_LESS_THAN_SIGN, CHAR_EQUALS_SIGN, 16065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_GREATER_THAN_SIGN, CHAR_QUESTION_MARK, 16165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_COMMERCIAL_AT, -ESC_A, 16265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich -ESC_B, -ESC_C, 16365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich -ESC_D, -ESC_E, 16465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, -ESC_G, 16565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich -ESC_H, 0, 16665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, -ESC_K, 16765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, 0, 16865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich -ESC_N, 0, 16965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich -ESC_P, -ESC_Q, 17065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich -ESC_R, -ESC_S, 17165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, 0, 17265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich -ESC_V, -ESC_W, 17365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich -ESC_X, 0, 17465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich -ESC_Z, CHAR_LEFT_SQUARE_BRACKET, 17565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_BACKSLASH, CHAR_RIGHT_SQUARE_BRACKET, 17665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_CIRCUMFLEX_ACCENT, CHAR_UNDERSCORE, 17765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_GRAVE_ACCENT, 7, 17865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich -ESC_b, 0, 17965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich -ESC_d, ESC_e, 18065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ESC_f, 0, 18165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich -ESC_h, 0, 18265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, -ESC_k, 18365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, 0, 18465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ESC_n, 0, 18565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich -ESC_p, 0, 18665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ESC_r, -ESC_s, 18765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ESC_tee, 0, 18865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich -ESC_v, -ESC_w, 18965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, 0, 19065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich -ESC_z 19165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}; 19265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 19365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else 19465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 19565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This is the "abnormal" table for EBCDIC systems without UTF-8 support. */ 19665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 19765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const short int escapes[] = { 19865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* 48 */ 0, 0, 0, '.', '<', '(', '+', '|', 19965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* 50 */ '&', 0, 0, 0, 0, 0, 0, 0, 20065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* 58 */ 0, 0, '!', '$', '*', ')', ';', '~', 20165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* 60 */ '-', '/', 0, 0, 0, 0, 0, 0, 20265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* 68 */ 0, 0, '|', ',', '%', '_', '>', '?', 20365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, 20465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* 78 */ 0, '`', ':', '#', '@', '\'', '=', '"', 20565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* 80 */ 0, 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0, 20665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* 88 */-ESC_h, 0, 0, '{', 0, 0, 0, 0, 20765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* 90 */ 0, 0, -ESC_k, 'l', 0, ESC_n, 0, -ESC_p, 20865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0, 20965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* A0 */ 0, '~', -ESC_s, ESC_tee, 0,-ESC_v, -ESC_w, 0, 21065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* A8 */ 0,-ESC_z, 0, 0, 0, '[', 0, 0, 21165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, 21265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-', 21365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G, 21465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* C8 */-ESC_H, 0, 0, 0, 0, 0, 0, 0, 21565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* D0 */ '}', 0, -ESC_K, 0, 0,-ESC_N, 0, -ESC_P, 21665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* D8 */-ESC_Q,-ESC_R, 0, 0, 0, 0, 0, 0, 21765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* E0 */ '\\', 0, -ESC_S, 0, 0,-ESC_V, -ESC_W, -ESC_X, 21865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0, 21965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0, 22065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* F8 */ 0, 0, 0, 0, 0, 0, 0, 0 22165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}; 22265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 22365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 22465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 22565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Table of special "verbs" like (*PRUNE). This is a short table, so it is 22665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsearched linearly. Put all the names into a single string, in order to reduce 22765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe number of relocations when a shared library is dynamically linked. The 22865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstring is built from string macros so that it works in UTF-8 mode on EBCDIC 22965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichplatforms. */ 23065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 23165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichtypedef struct verbitem { 23265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int len; /* Length of verb name */ 23365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int op; /* Op when no arg, or -1 if arg mandatory */ 23465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int op_arg; /* Op when arg present, or -1 if not allowed */ 23565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} verbitem; 23665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 23765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const char verbnames[] = 23865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "\0" /* Empty name is a shorthand for MARK */ 23965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich STRING_MARK0 24065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich STRING_ACCEPT0 24165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich STRING_COMMIT0 24265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich STRING_F0 24365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich STRING_FAIL0 24465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich STRING_PRUNE0 24565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich STRING_SKIP0 24665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich STRING_THEN; 24765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 24865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const verbitem verbs[] = { 24965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, -1, OP_MARK }, 25065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 4, -1, OP_MARK }, 25165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 6, OP_ACCEPT, -1 }, 25265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 6, OP_COMMIT, -1 }, 25365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 1, OP_FAIL, -1 }, 25465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 4, OP_FAIL, -1 }, 25565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 5, OP_PRUNE, OP_PRUNE_ARG }, 25665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 4, OP_SKIP, OP_SKIP_ARG }, 25765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 4, OP_THEN, OP_THEN_ARG } 25865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}; 25965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 26065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const int verbcount = sizeof(verbs)/sizeof(verbitem); 26165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 26265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 26365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Substitutes for [[:<:]] and [[:>:]], which mean start and end of word in 26465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichanother regex library. */ 26565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 26665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar sub_start_of_word[] = { 26765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_BACKSLASH, CHAR_b, CHAR_LEFT_PARENTHESIS, CHAR_QUESTION_MARK, 26865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_EQUALS_SIGN, CHAR_BACKSLASH, CHAR_w, CHAR_RIGHT_PARENTHESIS, '\0' }; 26965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 27065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar sub_end_of_word[] = { 27165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_BACKSLASH, CHAR_b, CHAR_LEFT_PARENTHESIS, CHAR_QUESTION_MARK, 27265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_LESS_THAN_SIGN, CHAR_EQUALS_SIGN, CHAR_BACKSLASH, CHAR_w, 27365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_RIGHT_PARENTHESIS, '\0' }; 27465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 27565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 27665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Tables of names of POSIX character classes and their lengths. The names are 27765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnow all in a single string, to reduce the number of relocations when a shared 27865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlibrary is dynamically loaded. The list of lengths is terminated by a zero 27965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlength entry. The first three must be alpha, lower, upper, as this is assumed 28065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor handling case independence. The indices for graph, print, and punct are 28165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichneeded, so identify them. */ 28265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 28365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const char posix_names[] = 28465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0 28565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0 28665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich STRING_graph0 STRING_print0 STRING_punct0 STRING_space0 28765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich STRING_word0 STRING_xdigit; 28865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 28965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uint8 posix_name_lengths[] = { 29065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 }; 29165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 29265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define PC_GRAPH 8 29365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define PC_PRINT 9 29465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define PC_PUNCT 10 29565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 29665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 29765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Table of class bit maps for each POSIX class. Each class is formed from a 29865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbase map, with an optional addition or removal of another map. Then, for some 29965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichclasses, there is some additional tweaking: for [:blank:] the vertical space 30065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcharacters are removed, and for [:alpha:] and [:alnum:] the underscore 30165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcharacter is removed. The triples in the table consist of the base map offset, 30265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsecond map offset or -1 if no second map, and a non-negative value for map 30365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichaddition or a negative value for map subtraction (if there are two maps). The 30465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichabsolute value of the third field has these meanings: 0 => no tweaking, 1 => 30565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichremove vertical space characters, 2 => remove underscore. */ 30665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 30765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const int posix_class_maps[] = { 30865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cbit_word, cbit_digit, -2, /* alpha */ 30965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cbit_lower, -1, 0, /* lower */ 31065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cbit_upper, -1, 0, /* upper */ 31165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cbit_word, -1, 2, /* alnum - word without underscore */ 31265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cbit_print, cbit_cntrl, 0, /* ascii */ 31365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cbit_space, -1, 1, /* blank - a GNU extension */ 31465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cbit_cntrl, -1, 0, /* cntrl */ 31565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cbit_digit, -1, 0, /* digit */ 31665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cbit_graph, -1, 0, /* graph */ 31765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cbit_print, -1, 0, /* print */ 31865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cbit_punct, -1, 0, /* punct */ 31965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cbit_space, -1, 0, /* space */ 32065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cbit_word, -1, 0, /* word - a Perl extension */ 32165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cbit_xdigit,-1, 0 /* xdigit */ 32265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}; 32365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 32465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Table of substitutes for \d etc when PCRE_UCP is set. They are replaced by 32565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichUnicode property escapes. */ 32665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 32765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP 32865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_PNd[] = { 32965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, 33065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 33165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_pNd[] = { 33265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, 33365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 33465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_PXsp[] = { 33565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, 33665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 33765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_pXsp[] = { 33865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, 33965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 34065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_PXwd[] = { 34165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, 34265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 34365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_pXwd[] = { 34465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, 34565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 34665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 34765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar *substitutes[] = { 34865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_PNd, /* \D */ 34965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_pNd, /* \d */ 35065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_PXsp, /* \S */ /* Xsp is Perl space, but from 8.34, Perl */ 35165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_pXsp, /* \s */ /* space and POSIX space are the same. */ 35265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_PXwd, /* \W */ 35365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_pXwd /* \w */ 35465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}; 35565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 35665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* The POSIX class substitutes must be in the order of the POSIX class names, 35765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdefined above, and there are both positive and negative cases. NULL means no 35865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichgeneral substitute of a Unicode property escape (\p or \P). However, for some 35965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPOSIX classes (e.g. graph, print, punct) a special property code is compiled 36065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdirectly. */ 36165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 36265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_pL[] = { 36365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, 36465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 36565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_pLl[] = { 36665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, 36765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 36865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_pLu[] = { 36965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, 37065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 37165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_pXan[] = { 37265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, 37365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 37465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_h[] = { 37565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_BACKSLASH, CHAR_h, '\0' }; 37665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_pXps[] = { 37765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, 37865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 37965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_PL[] = { 38065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, 38165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 38265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_PLl[] = { 38365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, 38465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 38565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_PLu[] = { 38665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, 38765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 38865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_PXan[] = { 38965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, 39065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 39165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_H[] = { 39265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_BACKSLASH, CHAR_H, '\0' }; 39365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar string_PXps[] = { 39465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, 39565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 39665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 39765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar *posix_substitutes[] = { 39865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_pL, /* alpha */ 39965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_pLl, /* lower */ 40065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_pLu, /* upper */ 40165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_pXan, /* alnum */ 40265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich NULL, /* ascii */ 40365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_h, /* blank */ 40465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich NULL, /* cntrl */ 40565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_pNd, /* digit */ 40665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich NULL, /* graph */ 40765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich NULL, /* print */ 40865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich NULL, /* punct */ 40965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_pXps, /* space */ /* Xps is POSIX space, but from 8.34 */ 41065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_pXwd, /* word */ /* Perl and POSIX space are the same */ 41165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich NULL, /* xdigit */ 41265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Negated cases */ 41365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_PL, /* ^alpha */ 41465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_PLl, /* ^lower */ 41565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_PLu, /* ^upper */ 41665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_PXan, /* ^alnum */ 41765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich NULL, /* ^ascii */ 41865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_H, /* ^blank */ 41965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich NULL, /* ^cntrl */ 42065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_PNd, /* ^digit */ 42165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich NULL, /* ^graph */ 42265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich NULL, /* ^print */ 42365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich NULL, /* ^punct */ 42465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_PXps, /* ^space */ /* Xps is POSIX space, but from 8.34 */ 42565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string_PXwd, /* ^word */ /* Perl and POSIX space are the same */ 42665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich NULL /* ^xdigit */ 42765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}; 42865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(pcre_uchar *)) 42965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 43065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 43165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define STRING(a) # a 43265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define XSTRING(s) STRING(s) 43365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 43465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* The texts of compile-time error messages. These are "char *" because they 43565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichare passed to the outside world. Do not ever re-use any error number, because 43665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthey are documented. Always add a new error instead. Messages marked DEAD below 43765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichare no longer used. This used to be a table of strings, but in order to reduce 43865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe number of relocations needed when a shared library is loaded dynamically, 43965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichit is now one long string. We cannot use a table of offsets, because the 44065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we 44165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsimply count through to the one we want - this isn't a performance issue 44265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbecause these strings are used only when there is a compilation error. 44365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 44465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichEach substring ends with \0 to insert a null character. This includes the final 44565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsubstring, so that the whole string ends with \0\0, which can be detected when 44665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcounting through. */ 44765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 44865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const char error_texts[] = 44965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "no error\0" 45065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "\\ at end of pattern\0" 45165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "\\c at end of pattern\0" 45265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "unrecognized character follows \\\0" 45365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "numbers out of order in {} quantifier\0" 45465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* 5 */ 45565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "number too big in {} quantifier\0" 45665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "missing terminating ] for character class\0" 45765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "invalid escape sequence in character class\0" 45865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "range out of order in character class\0" 45965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "nothing to repeat\0" 46065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* 10 */ 46165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "operand of unlimited repeat could match the empty string\0" /** DEAD **/ 46265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "internal error: unexpected repeat\0" 46365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "unrecognized character after (? or (?-\0" 46465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "POSIX named classes are supported only within a class\0" 46565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "missing )\0" 46665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* 15 */ 46765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "reference to non-existent subpattern\0" 46865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "erroffset passed as NULL\0" 46965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "unknown option bit(s) set\0" 47065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "missing ) after comment\0" 47165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "parentheses nested too deeply\0" /** DEAD **/ 47265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* 20 */ 47365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "regular expression is too large\0" 47465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "failed to get memory\0" 47565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "unmatched parentheses\0" 47665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "internal error: code overflow\0" 47765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "unrecognized character after (?<\0" 47865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* 25 */ 47965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "lookbehind assertion is not fixed length\0" 48065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "malformed number or name after (?(\0" 48165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "conditional group contains more than two branches\0" 48265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "assertion expected after (?(\0" 48365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "(?R or (?[+-]digits must be followed by )\0" 48465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* 30 */ 48565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "unknown POSIX class name\0" 48665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "POSIX collating elements are not supported\0" 48765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "this version of PCRE is compiled without UTF support\0" 48865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "spare error\0" /** DEAD **/ 48965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "character value in \\x{} or \\o{} is too large\0" 49065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* 35 */ 49165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "invalid condition (?(0)\0" 49265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "\\C not allowed in lookbehind assertion\0" 49365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0" 49465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "number after (?C is > 255\0" 49565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "closing ) for (?C expected\0" 49665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* 40 */ 49765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "recursive call could loop indefinitely\0" 49865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "unrecognized character after (?P\0" 49965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "syntax error in subpattern name (missing terminator)\0" 50065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "two named subpatterns have the same name\0" 50165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "invalid UTF-8 string\0" 50265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* 45 */ 50365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "support for \\P, \\p, and \\X has not been compiled\0" 50465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "malformed \\P or \\p sequence\0" 50565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "unknown property name after \\P or \\p\0" 50665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0" 50765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0" 50865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* 50 */ 50965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "repeated subpattern is too long\0" /** DEAD **/ 51065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "octal value is greater than \\377 in 8-bit non-UTF-8 mode\0" 51165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "internal error: overran compiling workspace\0" 51265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "internal error: previously-checked referenced subpattern not found\0" 51365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "DEFINE group contains more than one branch\0" 51465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* 55 */ 51565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "repeating a DEFINE group is not allowed\0" /** DEAD **/ 51665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "inconsistent NEWLINE options\0" 51765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0" 51865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "a numbered reference must not be zero\0" 51965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" 52065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* 60 */ 52165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "(*VERB) not recognized or malformed\0" 52265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "number is too big\0" 52365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "subpattern name expected\0" 52465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "digit expected after (?+\0" 52565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "] is an invalid data character in JavaScript compatibility mode\0" 52665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* 65 */ 52765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "different names for subpatterns of the same number are not allowed\0" 52865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "(*MARK) must have an argument\0" 52965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "this version of PCRE is not compiled with Unicode property support\0" 53065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "\\c must be followed by an ASCII character\0" 53165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "\\k is not followed by a braced, angle-bracketed, or quoted name\0" 53265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* 70 */ 53365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "internal error: unknown opcode in find_fixedlength()\0" 53465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "\\N is not supported in a class\0" 53565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "too many forward references\0" 53665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0" 53765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "invalid UTF-16 string\0" 53865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* 75 */ 53965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0" 54065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "character value in \\u.... sequence is too large\0" 54165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "invalid UTF-32 string\0" 54265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "setting UTF is disabled by the application\0" 54365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "non-hex character in \\x{} (closing brace missing?)\0" 54465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* 80 */ 54565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "non-octal character in \\o{} (closing brace missing?)\0" 54665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "missing opening brace after \\o\0" 54765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "parentheses are too deeply nested\0" 54865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "invalid range in character class\0" 54965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "group name must start with a non-digit\0" 55065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* 85 */ 55165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "parentheses are too deeply nested (stack check)\0" 55265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "digits missing in \\x{} or \\o{}\0" 55365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ; 55465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 55565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Table to identify digits and hex digits. This is used when compiling 55665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpatterns. Note that the tables in chartables are dependent on the locale, and 55765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichmay mark arbitrary characters as digits - but the PCRE compiling code expects 55865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichto handle only 0-9, a-z, and A-Z as digits when compiling. That is why we have 55965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevicha private table here. It costs 256 bytes, but it is a lot faster than doing 56065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcharacter value tests (at least in some simple cases I timed), and in some 56165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichapplications one wants PCRE to compile efficiently as well as match 56265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichefficiently. 56365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 56465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichFor convenience, we use the same bit definitions as in chartables: 56565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 56665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x04 decimal digit 56765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x08 hexadecimal digit 56865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 56965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichThen we can use ctype_digit and ctype_xdigit in the code. */ 57065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 57165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Using a simple comparison for decimal numbers rather than a memory read 57265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis much faster, and the resulting code is simpler (the compiler turns it 57365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichinto a subtraction and unsigned comparison). */ 57465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 57565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define IS_DIGIT(x) ((x) >= CHAR_0 && (x) <= CHAR_9) 57665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 57765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef EBCDIC 57865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 57965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This is the "normal" case, for ASCII systems, and EBCDIC systems running in 58065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichUTF-8 mode. */ 58165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 58265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uint8 digitab[] = 58365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 58465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ 58565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */ 58665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ 58765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ 58865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */ 58965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */ 59065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 */ 59165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */ 59265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* @ - G */ 59365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H - O */ 59465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* P - W */ 59565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* X - _ */ 59665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* ` - g */ 59765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h - o */ 59865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* p - w */ 59965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* x -127 */ 60065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ 60165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ 60265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ 60365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ 60465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ 60565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ 60665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ 60765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ 60865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ 60965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ 61065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ 61165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ 61265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ 61365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ 61465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ 61565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ 61665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 61765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else 61865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 61965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */ 62065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 62165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uint8 digitab[] = 62265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 62365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 0 */ 62465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */ 62565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 10 */ 62665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ 62765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 32- 39 20 */ 62865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */ 62965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 30 */ 63065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */ 63165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 40 */ 63265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 72- | */ 63365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 50 */ 63465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 88- 95 */ 63565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 60 */ 63665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ? */ 63765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */ 63865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */ 63965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* 128- g 80 */ 64065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */ 64165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144- p 90 */ 64265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */ 64365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160- x A0 */ 64465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */ 64565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 B0 */ 64665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ 64765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* { - G C0 */ 64865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */ 64965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* } - P D0 */ 65065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */ 65165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* \ - X E0 */ 65265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */ 65365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 F0 */ 65465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */ 65565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 65665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uint8 ebcdic_chartab[] = { /* chartable partial dup */ 65765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 0- 7 */ 65865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */ 65965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 16- 23 */ 66065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ 66165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 32- 39 */ 66265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */ 66365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 */ 66465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */ 66565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 */ 66665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /* 72- | */ 66765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 */ 66865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /* 88- 95 */ 66965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 */ 67065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ? */ 67165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */ 67265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */ 67365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* 128- g */ 67465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */ 67565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* 144- p */ 67665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */ 67765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* 160- x */ 67865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */ 67965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 */ 68065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ 68165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x80,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* { - G */ 68265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */ 68365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* } - P */ 68465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */ 68565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* \ - X */ 68665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */ 68765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */ 68865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */ 68965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 69065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 69165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 69265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This table is used to check whether auto-possessification is possible 69365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbetween adjacent character-type opcodes. The left-hand (repeated) opcode is 69465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichused to select the row, and the right-hand opcode is use to select the column. 69565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichA value of 1 means that auto-possessification is OK. For example, the second 69665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvalue in the first row means that \D+\d can be turned into \D++\d. 69765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 69865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichThe Unicode property types (\P and \p) have to be present to fill out the table 69965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbecause of what their opcode values are, but the table values should always be 70065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichzero because property types are handled separately in the code. The last four 70165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcolumns apply to items that cannot be repeated, so there is no need to have 70265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichrows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is 70365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ 70465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 70565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1) 70665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1) 70765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 70865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uint8 autoposstab[APTROWS][APTCOLS] = { 70965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* \D \d \S \s \W \w . .+ \C \P \p \R \H \h \V \v \X \Z \z $ $M */ 71065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \D */ 71165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \d */ 71265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \S */ 71365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \s */ 71465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \W */ 71565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1 }, /* \w */ 71665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* . */ 71765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* .+ */ 71865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 }, /* \C */ 71965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \P */ 72065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* \p */ 72165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \R */ 72265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0 }, /* \H */ 72365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \h */ 72465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0 }, /* \V */ 72565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0 }, /* \v */ 72665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0 } /* \X */ 72765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}; 72865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 72965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 73065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This table is used to check whether auto-possessification is possible 73165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbetween adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The 73265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichleft-hand (repeated) opcode is used to select the row, and the right-hand 73365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichopcode is used to select the column. The values are as follows: 73465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 73565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0 Always return FALSE (never auto-possessify) 73665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 1 Character groups are distinct (possessify if both are OP_PROP) 73765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 2 Check character categories in the same group (general or particular) 73865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 3 TRUE if the two opcodes are not the same (PROP vs NOTPROP) 73965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 74065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 4 Check left general category vs right particular category 74165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 5 Check right general category vs left particular category 74265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 74365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 6 Left alphanum vs right general category 74465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 7 Left space vs right general category 74565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 8 Left word vs right general category 74665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 74765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 9 Right alphanum vs left general category 74865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 10 Right space vs left general category 74965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 11 Right word vs left general category 75065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 75165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 12 Left alphanum vs right particular category 75265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 13 Left space vs right particular category 75365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 14 Left word vs right particular category 75465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 75565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 15 Right alphanum vs left particular category 75665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 16 Right space vs left particular category 75765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 17 Right word vs left particular category 75865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 75965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 76065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uint8 propposstab[PT_TABSIZE][PT_TABSIZE] = { 76165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* ANY LAMP GC PC SC ALNUM SPACE PXSPACE WORD CLIST UCNC */ 76265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */ 76365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 3, 0, 0, 0, 3, 1, 1, 0, 0, 0 }, /* PT_LAMP */ 76465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 0, 2, 4, 0, 9, 10, 10, 11, 0, 0 }, /* PT_GC */ 76565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 0, 5, 2, 0, 15, 16, 16, 17, 0, 0 }, /* PT_PC */ 76665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0 }, /* PT_SC */ 76765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 3, 6, 12, 0, 3, 1, 1, 0, 0, 0 }, /* PT_ALNUM */ 76865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_SPACE */ 76965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_PXSPACE */ 77065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 0, 8, 14, 0, 0, 1, 1, 3, 0, 0 }, /* PT_WORD */ 77165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */ 77265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 } /* PT_UCNC */ 77365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}; 77465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 77565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This table is used to check whether auto-possessification is possible 77665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbetween adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one 77765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichspecifies a general category and the other specifies a particular category. The 77865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichrow is selected by the general category and the column by the particular 77965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcategory. The value is 1 if the particular category is not part of the general 78065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcategory. */ 78165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 78265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uint8 catposstab[7][30] = { 78365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs */ 78465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* C */ 78565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* L */ 78665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* M */ 78765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, /* N */ 78865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 }, /* P */ 78965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1 }, /* S */ 79065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0 } /* Z */ 79165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}; 79265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 79365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against 79465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevicha general or particular category. The properties in each row are those 79565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthat apply to the character set in question. Duplication means that a little 79665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichunnecessary work is done when checking, but this keeps things much simpler 79765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbecause they can all use the same code. For more details see the comment where 79865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthis table is used. 79965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 80065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichNote: SPACE and PXSPACE used to be different because Perl excluded VT from 80165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich"space", but from Perl 5.18 it's included, so both categories are treated the 80265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsame here. */ 80365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 80465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uint8 posspropstab[3][4] = { 80565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { ucp_L, ucp_N, ucp_N, ucp_Nl }, /* ALNUM, 3rd and 4th values redundant */ 80665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { ucp_Z, ucp_Z, ucp_C, ucp_Cc }, /* SPACE and PXSPACE, 2nd value redundant */ 80765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { ucp_L, ucp_N, ucp_P, ucp_Po } /* WORD */ 80865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}; 80965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 81065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This table is used when converting repeating opcodes into possessified 81165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichversions as a result of an explicit possessive quantifier such as ++. A zero 81265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvalue means there is no possessified version - in those cases the item in 81365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichquestion must be wrapped in ONCE brackets. The table is truncated at OP_CALLOUT 81465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbecause all relevant opcodes are less than that. */ 81565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 81665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uint8 opcode_possessify[] = { 81765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 15 */ 81865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16 - 31 */ 81965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 82065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, /* NOTI */ 82165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_POSSTAR, 0, /* STAR, MINSTAR */ 82265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_POSPLUS, 0, /* PLUS, MINPLUS */ 82365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_POSQUERY, 0, /* QUERY, MINQUERY */ 82465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_POSUPTO, 0, /* UPTO, MINUPTO */ 82565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, /* EXACT */ 82665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, 0, 0, 0, /* POS{STAR,PLUS,QUERY,UPTO} */ 82765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 82865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_POSSTARI, 0, /* STARI, MINSTARI */ 82965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_POSPLUSI, 0, /* PLUSI, MINPLUSI */ 83065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_POSQUERYI, 0, /* QUERYI, MINQUERYI */ 83165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_POSUPTOI, 0, /* UPTOI, MINUPTOI */ 83265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, /* EXACTI */ 83365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, 0, 0, 0, /* POS{STARI,PLUSI,QUERYI,UPTOI} */ 83465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 83565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_NOTPOSSTAR, 0, /* NOTSTAR, NOTMINSTAR */ 83665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_NOTPOSPLUS, 0, /* NOTPLUS, NOTMINPLUS */ 83765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_NOTPOSQUERY, 0, /* NOTQUERY, NOTMINQUERY */ 83865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_NOTPOSUPTO, 0, /* NOTUPTO, NOTMINUPTO */ 83965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, /* NOTEXACT */ 84065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, 0, 0, 0, /* NOTPOS{STAR,PLUS,QUERY,UPTO} */ 84165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 84265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_NOTPOSSTARI, 0, /* NOTSTARI, NOTMINSTARI */ 84365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_NOTPOSPLUSI, 0, /* NOTPLUSI, NOTMINPLUSI */ 84465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_NOTPOSQUERYI, 0, /* NOTQUERYI, NOTMINQUERYI */ 84565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_NOTPOSUPTOI, 0, /* NOTUPTOI, NOTMINUPTOI */ 84665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, /* NOTEXACTI */ 84765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, 0, 0, 0, /* NOTPOS{STARI,PLUSI,QUERYI,UPTOI} */ 84865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 84965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_TYPEPOSSTAR, 0, /* TYPESTAR, TYPEMINSTAR */ 85065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_TYPEPOSPLUS, 0, /* TYPEPLUS, TYPEMINPLUS */ 85165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_TYPEPOSQUERY, 0, /* TYPEQUERY, TYPEMINQUERY */ 85265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_TYPEPOSUPTO, 0, /* TYPEUPTO, TYPEMINUPTO */ 85365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, /* TYPEEXACT */ 85465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, 0, 0, 0, /* TYPEPOS{STAR,PLUS,QUERY,UPTO} */ 85565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 85665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_CRPOSSTAR, 0, /* CRSTAR, CRMINSTAR */ 85765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_CRPOSPLUS, 0, /* CRPLUS, CRMINPLUS */ 85865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_CRPOSQUERY, 0, /* CRQUERY, CRMINQUERY */ 85965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_CRPOSRANGE, 0, /* CRRANGE, CRMINRANGE */ 86065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, 0, 0, 0, /* CRPOS{STAR,PLUS,QUERY,RANGE} */ 86165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 86265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, 0, 0, /* CLASS, NCLASS, XCLASS */ 86365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, 0, /* REF, REFI */ 86465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, 0, /* DNREF, DNREFI */ 86565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0, 0 /* RECURSE, CALLOUT */ 86665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich}; 86765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 86865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 86965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 87065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 87165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Find an error text * 87265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 87365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 87465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* The error texts are now all in one long string, to save on relocations. As 87565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsome of the text is of unknown length, we can't use a table of offsets. 87665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichInstead, just count through the strings. This is not a performance issue 87765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbecause it happens only when there has been a compilation error. 87865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 87965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArgument: the error number 88065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: pointer to the error string 88165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 88265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 88365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const char * 88465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfind_error_text(int n) 88565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 88665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst char *s = error_texts; 88765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (; n > 0; n--) 88865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 88965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (*s++ != CHAR_NULL) {}; 89065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*s == CHAR_NULL) return "Error text not found (please report)"; 89165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 89265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn s; 89365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 89465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 89565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 89665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 89765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 89865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Expand the workspace * 89965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 90065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 90165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is called during the second compiling phase, if the number of 90265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichforward references fills the existing workspace, which is originally a block on 90365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe stack. A larger block is obtained from malloc() unless the ultimate limit 90465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichhas been reached or the increase will be rather small. 90565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 90665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArgument: pointer to the compile data block 90765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: 0 if all went well, else an error number 90865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 90965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 91065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic int 91165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichexpand_workspace(compile_data *cd) 91265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 91365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *newspace; 91465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint newsize = cd->workspace_size * 2; 91565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 91665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (newsize > COMPILE_WORK_SIZE_MAX) newsize = COMPILE_WORK_SIZE_MAX; 91765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (cd->workspace_size >= COMPILE_WORK_SIZE_MAX || 91865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich newsize - cd->workspace_size < WORK_SIZE_SAFETY_MARGIN) 91965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return ERR72; 92065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 92165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnewspace = (PUBL(malloc))(IN_UCHARS(newsize)); 92265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (newspace == NULL) return ERR21; 92365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichmemcpy(newspace, cd->start_workspace, cd->workspace_size * sizeof(pcre_uchar)); 92465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->hwm = (pcre_uchar *)newspace + (cd->hwm - cd->start_workspace); 92565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (cd->workspace_size > COMPILE_WORK_SIZE) 92665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (PUBL(free))((void *)cd->start_workspace); 92765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->start_workspace = newspace; 92865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->workspace_size = newsize; 92965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn 0; 93065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 93165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 93265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 93365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 93465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 93565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Check for counted repeat * 93665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 93765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 93865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is called when a '{' is encountered in a place where it might 93965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstart a quantifier. It looks ahead to see if it really is a quantifier or not. 94065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichIt is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd} 94165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhere the ddds are digits. 94265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 94365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 94465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich p pointer to the first char after '{' 94565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 94665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: TRUE or FALSE 94765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 94865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 94965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL 95065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis_counted_repeat(const pcre_uchar *p) 95165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 95265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (!IS_DIGIT(*p)) return FALSE; 95365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichp++; 95465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (IS_DIGIT(*p)) p++; 95565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE; 95665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 95765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (*p++ != CHAR_COMMA) return FALSE; 95865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE; 95965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 96065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (!IS_DIGIT(*p)) return FALSE; 96165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichp++; 96265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (IS_DIGIT(*p)) p++; 96365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 96465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn (*p == CHAR_RIGHT_CURLY_BRACKET); 96565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 96665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 96765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 96865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 96965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 97065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Handle escapes * 97165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 97265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 97365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is called when a \ has been encountered. It either returns a 97465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpositive value for a simple escape such as \n, or 0 for a data character which 97565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwill be placed in chptr. A backreference to group n is returned as negative n. 97665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichWhen UTF-8 is enabled, a positive value greater than 255 may be returned in 97765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichchptr. On entry, ptr is pointing at the \. On exit, it is on the final 97865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcharacter of the escape sequence. 97965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 98065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 98165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptrptr points to the pattern position pointer 98265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich chptr points to a returned data character 98365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich errorcodeptr points to the errorcode variable 98465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bracount number of previous extracting brackets 98565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich options the options bits 98665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich isclass TRUE if inside a character class 98765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 98865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: zero => a data character 98965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich positive => a special escape sequence 99065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich negative => a back reference 99165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich on error, errorcodeptr is set 99265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 99365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 99465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic int 99565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcheck_escape(const pcre_uchar **ptrptr, pcre_uint32 *chptr, int *errorcodeptr, 99665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int bracount, int options, BOOL isclass) 99765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 99865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* PCRE_UTF16 has the same value as PCRE_UTF8. */ 99965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL utf = (options & PCRE_UTF8) != 0; 100065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *ptr = *ptrptr + 1; 100165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 c; 100265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint escape = 0; 100365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint i; 100465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 100565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichGETCHARINCTEST(c, ptr); /* Get character value, increment pointer */ 100665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichptr--; /* Set pointer back to the last byte */ 100765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 100865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If backslash is at the end of the pattern, it's an error. */ 100965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 101065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (c == CHAR_NULL) *errorcodeptr = ERR1; 101165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 101265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Non-alphanumerics are literals. For digits or letters, do an initial lookup 101365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichin a table. A non-zero result is something that can be returned immediately. 101465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichOtherwise further processing may be required. */ 101565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 101665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef EBCDIC /* ASCII/UTF-8 coding */ 101765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Not alphanumeric */ 101865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichelse if (c < CHAR_0 || c > CHAR_z) {} 101965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichelse if ((i = escapes[c - CHAR_0]) != 0) 102065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { if (i > 0) c = (pcre_uint32)i; else escape = -i; } 102165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 102265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else /* EBCDIC coding */ 102365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Not alphanumeric */ 102465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichelse if (c < CHAR_a || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {} 102565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichelse if ((i = escapes[c - 0x48]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; } 102665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 102765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 102865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Escapes that need further processing, or are illegal. */ 102965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 103065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichelse 103165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 103265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uchar *oldptr; 103365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich BOOL braced, negated, overflow; 103465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int s; 103565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 103665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch (c) 103765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 103865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* A number of Perl escapes are not handled by PCRE. We give an explicit 103965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich error. */ 104065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 104165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_l: 104265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_L: 104365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR37; 104465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 104565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 104665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_u: 104765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((options & PCRE_JAVASCRIPT_COMPAT) != 0) 104865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 104965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In JavaScript, \u must be followed by four hexadecimal numbers. 105065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Otherwise it is a lowercase u letter. */ 105165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0 105265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0 105365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich && MAX_255(ptr[3]) && (digitab[ptr[3]] & ctype_xdigit) != 0 105465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich && MAX_255(ptr[4]) && (digitab[ptr[4]] & ctype_xdigit) != 0) 105565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 105665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = 0; 105765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (i = 0; i < 4; ++i) 105865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 105965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich register pcre_uint32 cc = *(++ptr); 106065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef EBCDIC /* ASCII/UTF-8 coding */ 106165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ 106265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10)); 106365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else /* EBCDIC coding */ 106465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */ 106565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10)); 106665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 106765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 106865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 106965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8 107065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c > (utf ? 0x10ffffU : 0xffU)) 107165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16 107265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c > (utf ? 0x10ffffU : 0xffffU)) 107365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32 107465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf && c > 0x10ffffU) 107565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 107665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 107765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR76; 107865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 107965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73; 108065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 108165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 108265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 108365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR37; 108465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 108565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 108665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_U: 108765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In JavaScript, \U is an uppercase U letter. */ 108865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) *errorcodeptr = ERR37; 108965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 109065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 109165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In a character class, \g is just a literal "g". Outside a character 109265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich class, \g must be followed by one of a number of specific things: 109365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 109465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (1) A number, either plain or braced. If positive, it is an absolute 109565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich backreference. If negative, it is a relative backreference. This is a Perl 109665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 5.10 feature. 109765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 109865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (2) Perl 5.10 also supports \g{name} as a reference to a named group. This 109965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich is part of Perl's movement towards a unified syntax for back references. As 110065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich this is synonymous with \k{name}, we fudge it up by pretending it really 110165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich was \k. 110265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 110365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (3) For Oniguruma compatibility we also support \g followed by a name or a 110465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich number either in angle brackets or in single quotes. However, these are 110565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (possibly recursive) subroutine calls, _not_ backreferences. Just return 110665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the ESC_g code (cf \k). */ 110765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 110865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_g: 110965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (isclass) break; 111065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE) 111165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 111265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich escape = ESC_g; 111365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 111465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 111565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 111665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle the Perl-compatible cases */ 111765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 111865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (ptr[1] == CHAR_LEFT_CURLY_BRACKET) 111965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 112065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uchar *p; 112165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (p = ptr+2; *p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET; p++) 112265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break; 112365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET) 112465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 112565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich escape = ESC_k; 112665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 112765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 112865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich braced = TRUE; 112965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 113065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 113165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else braced = FALSE; 113265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 113365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (ptr[1] == CHAR_MINUS) 113465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 113565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich negated = TRUE; 113665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 113765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 113865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else negated = FALSE; 113965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 114065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* The integer range is limited by the machine's int representation. */ 114165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich s = 0; 114265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich overflow = FALSE; 114365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (IS_DIGIT(ptr[1])) 114465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 114565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (s > INT_MAX / 10 - 1) /* Integer overflow */ 114665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 114765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich overflow = TRUE; 114865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 114965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 115065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich s = s * 10 + (int)(*(++ptr) - CHAR_0); 115165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 115265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (overflow) /* Integer overflow */ 115365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 115465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (IS_DIGIT(ptr[1])) 115565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 115665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR61; 115765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 115865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 115965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 116065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (braced && *(++ptr) != CHAR_RIGHT_CURLY_BRACKET) 116165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 116265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR57; 116365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 116465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 116565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 116665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (s == 0) 116765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 116865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR58; 116965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 117065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 117165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 117265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (negated) 117365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 117465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (s > bracount) 117565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 117665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR15; 117765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 117865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 117965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich s = bracount - (s - 1); 118065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 118165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 118265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich escape = -s; 118365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 118465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 118565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* The handling of escape sequences consisting of a string of digits 118665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich starting with one that is not zero is not straightforward. Perl has changed 118765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich over the years. Nowadays \g{} for backreferences and \o{} for octal are 118865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recommended to avoid the ambiguities in the old syntax. 118965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 119065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Outside a character class, the digits are read as a decimal number. If the 119165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich number is less than 8 (used to be 10), or if there are that many previous 119265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich extracting left brackets, then it is a back reference. Otherwise, up to 119365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich three octal digits are read to form an escaped byte. Thus \123 is likely to 119465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich be octal 123 (cf \0123, which is octal 012 followed by the literal 3). If 119565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the octal value is greater than 377, the least significant 8 bits are 119665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich taken. \8 and \9 are treated as the literal characters 8 and 9. 119765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 119865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Inside a character class, \ followed by a digit is always either a literal 119965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 8 or 9 or an octal number. */ 120065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 120165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5: 120265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9: 120365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 120465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!isclass) 120565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 120665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich oldptr = ptr; 120765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* The integer range is limited by the machine's int representation. */ 120865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich s = (int)(c -CHAR_0); 120965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich overflow = FALSE; 121065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (IS_DIGIT(ptr[1])) 121165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 121265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (s > INT_MAX / 10 - 1) /* Integer overflow */ 121365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 121465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich overflow = TRUE; 121565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 121665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 121765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich s = s * 10 + (int)(*(++ptr) - CHAR_0); 121865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 121965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (overflow) /* Integer overflow */ 122065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 122165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (IS_DIGIT(ptr[1])) 122265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 122365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR61; 122465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 122565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 122665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (s < 8 || s <= bracount) /* Check for back reference */ 122765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 122865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich escape = -s; 122965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 123065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 123165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr = oldptr; /* Put the pointer back and fall through */ 123265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 123365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 123465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle a digit following \ when the number is not a back reference. If 123565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the first digit is 8 or 9, Perl used to generate a binary zero byte and 123665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich then treat the digit as a following literal. At least by Perl 5.18 this 123765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich changed so as not to insert the binary zero. */ 123865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 123965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((c = *ptr) >= CHAR_8) break; 124065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 124165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Fall through with a digit less than 8 */ 124265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 124365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* \0 always starts an octal number, but we may drop through to here with a 124465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich larger first octal digit. The original code used just to take the least 124565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich significant 8 bits of octal numbers (I think this is what early Perls used 124665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich to do). Nowadays we allow for larger numbers in UTF-8 mode and 16-bit mode, 124765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich but no more than 3 octal digits. */ 124865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 124965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_0: 125065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c -= CHAR_0; 125165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7) 125265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = c * 8 + *(++ptr) - CHAR_0; 125365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef COMPILE_PCRE8 125465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!utf && c > 0xff) *errorcodeptr = ERR51; 125565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 125665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 125765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 125865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* \o is a relatively new Perl feature, supporting a more general way of 125965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich specifying character codes in octal. The only supported form is \o{ddd}. */ 126065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 126165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_o: 126265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (ptr[1] != CHAR_LEFT_CURLY_BRACKET) *errorcodeptr = ERR81; else 126365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (ptr[2] == CHAR_RIGHT_CURLY_BRACKET) *errorcodeptr = ERR86; else 126465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 126565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr += 2; 126665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = 0; 126765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich overflow = FALSE; 126865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (*ptr >= CHAR_0 && *ptr <= CHAR_7) 126965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 127065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich register pcre_uint32 cc = *ptr++; 127165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */ 127265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef COMPILE_PCRE32 127365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c >= 0x20000000l) { overflow = TRUE; break; } 127465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 127565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = (c << 3) + cc - CHAR_0 ; 127665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8 127765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; } 127865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16 127965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; } 128065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32 128165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf && c > 0x10ffffU) { overflow = TRUE; break; } 128265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 128365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 128465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (overflow) 128565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 128665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (*ptr >= CHAR_0 && *ptr <= CHAR_7) ptr++; 128765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR34; 128865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 128965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (*ptr == CHAR_RIGHT_CURLY_BRACKET) 129065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 129165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73; 129265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 129365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else *errorcodeptr = ERR80; 129465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 129565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 129665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 129765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* \x is complicated. In JavaScript, \x must be followed by two hexadecimal 129865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich numbers. Otherwise it is a lowercase x letter. */ 129965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 130065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_x: 130165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((options & PCRE_JAVASCRIPT_COMPAT) != 0) 130265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 130365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0 130465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0) 130565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 130665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = 0; 130765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (i = 0; i < 2; ++i) 130865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 130965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich register pcre_uint32 cc = *(++ptr); 131065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef EBCDIC /* ASCII/UTF-8 coding */ 131165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ 131265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10)); 131365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else /* EBCDIC coding */ 131465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */ 131565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10)); 131665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 131765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 131865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 131965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } /* End JavaScript handling */ 132065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 132165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle \x in Perl's style. \x{ddd} is a character number which can be 132265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich greater than 0xff in utf or non-8bit mode, but only if the ddd are hex 132365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich digits. If not, { used to be treated as a data character. However, Perl 132465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich seems to read hex digits up to the first non-such, and ignore the rest, so 132565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich that, for example \x{zz} matches a binary zero. This seems crazy, so PCRE 132665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich now gives an error. */ 132765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 132865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 132965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 133065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (ptr[1] == CHAR_LEFT_CURLY_BRACKET) 133165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 133265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr += 2; 133365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*ptr == CHAR_RIGHT_CURLY_BRACKET) 133465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 133565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR86; 133665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 133765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 133865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = 0; 133965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich overflow = FALSE; 134065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (MAX_255(*ptr) && (digitab[*ptr] & ctype_xdigit) != 0) 134165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 134265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich register pcre_uint32 cc = *ptr++; 134365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */ 134465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 134565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef COMPILE_PCRE32 134665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c >= 0x10000000l) { overflow = TRUE; break; } 134765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 134865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 134965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef EBCDIC /* ASCII/UTF-8 coding */ 135065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ 135165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10)); 135265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else /* EBCDIC coding */ 135365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */ 135465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10)); 135565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 135665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 135765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8 135865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; } 135965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16 136065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; } 136165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32 136265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf && c > 0x10ffffU) { overflow = TRUE; break; } 136365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 136465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 136565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 136665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (overflow) 136765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 136865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (MAX_255(*ptr) && (digitab[*ptr] & ctype_xdigit) != 0) ptr++; 136965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR34; 137065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 137165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 137265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (*ptr == CHAR_RIGHT_CURLY_BRACKET) 137365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 137465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73; 137565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 137665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 137765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If the sequence of hex digits does not end with '}', give an error. 137865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich We used just to recognize this construct and fall through to the normal 137965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich \x handling, but nowadays Perl gives an error, which seems much more 138065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich sensible, so we do too. */ 138165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 138265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else *errorcodeptr = ERR79; 138365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } /* End of \x{} processing */ 138465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 138565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Read a single-byte hex-defined char (up to two hex digits after \x) */ 138665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 138765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 138865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 138965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = 0; 139065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0) 139165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 139265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uint32 cc; /* Some compilers don't like */ 139365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cc = *(++ptr); /* ++ in initializers */ 139465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef EBCDIC /* ASCII/UTF-8 coding */ 139565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ 139665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10)); 139765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else /* EBCDIC coding */ 139865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cc <= CHAR_z) cc += 64; /* Convert to upper case */ 139965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10)); 140065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 140165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 140265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } /* End of \xdd handling */ 140365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } /* End of Perl-style \x handling */ 140465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 140565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 140665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* For \c, a following letter is upper-cased; then the 0x40 bit is flipped. 140765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich An error is given if the byte following \c is not an ASCII character. This 140865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich coding is ASCII-specific, but then the whole concept of \cx is 140965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ASCII-specific. (However, an EBCDIC equivalent has now been added.) */ 141065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 141165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_c: 141265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = *(++ptr); 141365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == CHAR_NULL) 141465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 141565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR2; 141665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 141765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 141865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef EBCDIC /* ASCII/UTF-8 coding */ 141965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c > 127) /* Excludes all non-ASCII in either mode */ 142065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 142165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR68; 142265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 142365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 142465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c >= CHAR_a && c <= CHAR_z) c -= 32; 142565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c ^= 0x40; 142665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else /* EBCDIC coding */ 142765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c >= CHAR_a && c <= CHAR_z) c += 64; 142865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c ^= 0xC0; 142965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 143065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 143165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 143265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any 143365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich other alphanumeric following \ is an error if PCRE_EXTRA was set; 143465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich otherwise, for Perl compatibility, it is a literal. This code looks a bit 143565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich odd, but there used to be some cases other than the default, and there may 143665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich be again in future, so I haven't "optimized" it. */ 143765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 143865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: 143965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((options & PCRE_EXTRA) != 0) switch(c) 144065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 144165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: 144265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR3; 144365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 144465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 144565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 144665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 144765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 144865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 144965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Perl supports \N{name} for character names, as well as plain \N for "not 145065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnewline". PCRE does not support \N{name}. However, it does support 145165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichquantification such as \N{2,3}. */ 145265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 145365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (escape == ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET && 145465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich !is_counted_repeat(ptr+2)) 145565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR37; 145665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 145765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If PCRE_UCP is set, we change the values for \d etc. */ 145865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 145965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif ((options & PCRE_UCP) != 0 && escape >= ESC_D && escape <= ESC_w) 146065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich escape += (ESC_DU - ESC_D); 146165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 146265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Set the pointer to the final character before returning. */ 146365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 146465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*ptrptr = ptr; 146565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*chptr = c; 146665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn escape; 146765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 146865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 146965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 147065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 147165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP 147265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 147365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Handle \P and \p * 147465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 147565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 147665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is called after \P or \p has been encountered, provided that 147765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPCRE is compiled with support for Unicode properties. On entry, ptrptr is 147865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpointing at the P or p. On exit, it is pointing at the final character of the 147965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichescape sequence. 148065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 148165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArgument: 148265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptrptr points to the pattern position pointer 148365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich negptr points to a boolean that is set TRUE for negation else FALSE 148465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptypeptr points to an unsigned int that is set to the type value 148565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pdataptr points to an unsigned int that is set to the detailed property value 148665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich errorcodeptr points to the error code variable 148765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 148865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: TRUE if the type value was found, or FALSE for an invalid type 148965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 149065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 149165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL 149265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichget_ucp(const pcre_uchar **ptrptr, BOOL *negptr, unsigned int *ptypeptr, 149365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich unsigned int *pdataptr, int *errorcodeptr) 149465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 149565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar c; 149665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint i, bot, top; 149765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *ptr = *ptrptr; 149865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar name[32]; 149965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 150065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichc = *(++ptr); 150165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (c == CHAR_NULL) goto ERROR_RETURN; 150265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 150365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*negptr = FALSE; 150465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 150565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* \P or \p can be followed by a name in {}, optionally preceded by ^ for 150665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnegation. */ 150765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 150865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (c == CHAR_LEFT_CURLY_BRACKET) 150965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 151065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (ptr[1] == CHAR_CIRCUMFLEX_ACCENT) 151165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 151265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *negptr = TRUE; 151365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 151465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 151565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (i = 0; i < (int)(sizeof(name) / sizeof(pcre_uchar)) - 1; i++) 151665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 151765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = *(++ptr); 151865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == CHAR_NULL) goto ERROR_RETURN; 151965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == CHAR_RIGHT_CURLY_BRACKET) break; 152065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich name[i] = c; 152165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 152265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN; 152365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich name[i] = 0; 152465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 152565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 152665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Otherwise there is just one following character */ 152765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 152865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichelse 152965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 153065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich name[0] = c; 153165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich name[1] = 0; 153265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 153365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 153465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*ptrptr = ptr; 153565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 153665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Search for a recognized property name using binary chop */ 153765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 153865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbot = 0; 153965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichtop = PRIV(utt_size); 154065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 154165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (bot < top) 154265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 154365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int r; 154465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich i = (bot + top) >> 1; 154565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich r = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset); 154665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (r == 0) 154765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 154865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *ptypeptr = PRIV(utt)[i].type; 154965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *pdataptr = PRIV(utt)[i].value; 155065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return TRUE; 155165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 155265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (r > 0) bot = i + 1; else top = i; 155365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 155465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 155565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*errorcodeptr = ERR47; 155665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*ptrptr = ptr; 155765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn FALSE; 155865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 155965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichERROR_RETURN: 156065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*errorcodeptr = ERR46; 156165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*ptrptr = ptr; 156265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn FALSE; 156365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 156465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 156565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 156665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 156765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 156865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 156965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Read repeat counts * 157065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 157165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 157265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Read an item of the form {n,m} and return the values. This is called only 157365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichafter is_counted_repeat() has confirmed that a repeat-count quantifier exists, 157465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichso the syntax is guaranteed to be correct, but we need to check the values. 157565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 157665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 157765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich p pointer to first char after '{' 157865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich minp pointer to int for min 157965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich maxp pointer to int for max 158065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich returned as -1 if no max 158165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich errorcodeptr points to error code variable 158265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 158365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: pointer to '}' on success; 158465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich current ptr on error, with errorcodeptr set non-zero 158565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 158665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 158765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar * 158865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichread_repeat_counts(const pcre_uchar *p, int *minp, int *maxp, int *errorcodeptr) 158965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 159065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint min = 0; 159165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint max = -1; 159265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 159365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (IS_DIGIT(*p)) 159465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 159565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich min = min * 10 + (int)(*p++ - CHAR_0); 159665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (min > 65535) 159765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 159865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR5; 159965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return p; 160065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 160165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 160265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 160365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else 160465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 160565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*(++p) != CHAR_RIGHT_CURLY_BRACKET) 160665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 160765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich max = 0; 160865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while(IS_DIGIT(*p)) 160965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 161065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich max = max * 10 + (int)(*p++ - CHAR_0); 161165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (max > 65535) 161265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 161365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR5; 161465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return p; 161565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 161665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 161765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (max < min) 161865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 161965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR4; 162065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return p; 162165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 162265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 162365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 162465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 162565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*minp = min; 162665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*maxp = max; 162765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn p; 162865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 162965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 163065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 163165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 163265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 163365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Find first significant op code * 163465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 163565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 163665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This is called by several functions that scan a compiled expression looking 163765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor a fixed first character, or an anchoring op code etc. It skips over things 163865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthat do not influence this. For some calls, it makes sense to skip negative 163965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichforward and all backward assertions, and also the \b assertion; for others it 164065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdoes not. 164165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 164265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 164365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code pointer to the start of the group 164465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skipassert TRUE if certain assertions are to be skipped 164565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 164665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: pointer to the first significant opcode 164765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 164865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 164965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar* 165065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfirst_significant_code(const pcre_uchar *code, BOOL skipassert) 165165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 165265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (;;) 165365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 165465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch ((int)*code) 165565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 165665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ASSERT_NOT: 165765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ASSERTBACK: 165865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ASSERTBACK_NOT: 165965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!skipassert) return code; 166065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do code += GET(code, 1); while (*code == OP_ALT); 166165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += PRIV(OP_lengths)[*code]; 166265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 166365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 166465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_WORD_BOUNDARY: 166565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_WORD_BOUNDARY: 166665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!skipassert) return code; 166765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Fall through */ 166865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 166965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CALLOUT: 167065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CREF: 167165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_DNCREF: 167265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_RREF: 167365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_DNRREF: 167465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_DEF: 167565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += PRIV(OP_lengths)[*code]; 167665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 167765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 167865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: 167965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return code; 168065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 168165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 168265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Control never reaches here */ 168365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 168465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 168565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 168665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 168765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 168865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Find the fixed length of a branch * 168965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 169065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 169165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Scan a branch and compute the fixed length of subject that will match it, 169265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif the length is fixed. This is needed for dealing with backward assertions. 169365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichIn UTF8 mode, the result is in characters rather than bytes. The branch is 169465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichtemporarily terminated with OP_END when this function is called. 169565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 169665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichThis function is called when a backward assertion is encountered, so that if it 169765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfails, the error message can point to the correct place in the pattern. 169865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichHowever, we cannot do this when the assertion contains subroutine calls, 169965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbecause they can be forward references. We solve this by remembering this case 170065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichand doing the check at the end; a flag specifies which mode we are running in. 170165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 170265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 170365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code points to the start of the pattern (the bracket) 170465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich utf TRUE in UTF-8 / UTF-16 / UTF-32 mode 170565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich atend TRUE if called when the pattern is complete 170665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd the "compile data" structure 170765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 170865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: the fixed length, 170965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich or -1 if there is no fixed length, 171065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich or -2 if \C was encountered (in UTF-8 mode only) 171165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich or -3 if an OP_RECURSE item was encountered and atend is FALSE 171265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich or -4 if an unknown opcode was encountered (internal error) 171365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 171465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 171565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic int 171665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfind_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd) 171765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 171865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint length = -1; 171965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 172065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichregister int branchlength = 0; 172165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichregister pcre_uchar *cc = code + 1 + LINK_SIZE; 172265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 172365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Scan along the opcodes for this branch. If we get to the end of the 172465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbranch, check the length against that of the other branches. */ 172565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 172665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (;;) 172765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 172865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int d; 172965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *ce, *cs; 173065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich register pcre_uchar op = *cc; 173165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 173265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch (op) 173365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 173465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* We only need to continue for OP_CBRA (normal capturing bracket) and 173565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_BRA (normal non-capturing bracket) because the other variants of these 173665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich opcodes are all concerned with unlimited repeated groups, which of course 173765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich are not of fixed length. */ 173865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 173965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CBRA: 174065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_BRA: 174165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ONCE: 174265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ONCE_NC: 174365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_COND: 174465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd); 174565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (d < 0) return d; 174665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich branchlength += d; 174765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do cc += GET(cc, 1); while (*cc == OP_ALT); 174865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cc += 1 + LINK_SIZE; 174965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 175065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 175165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Reached end of a branch; if it's a ket it is the end of a nested call. 175265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich If it's ALT it is an alternation in a nested call. An ACCEPT is effectively 175365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich an ALT. If it is END it's the end of the outer call. All can be handled by 175465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the same code. Note that we must not include the OP_KETRxxx opcodes here, 175565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich because they all imply an unlimited repeat. */ 175665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 175765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ALT: 175865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_KET: 175965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_END: 176065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ACCEPT: 176165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ASSERT_ACCEPT: 176265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (length < 0) length = branchlength; 176365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (length != branchlength) return -1; 176465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*cc != OP_ALT) return length; 176565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cc += 1 + LINK_SIZE; 176665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich branchlength = 0; 176765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 176865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 176965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* A true recursion implies not fixed length, but a subroutine call may 177065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich be OK. If the subroutine is a forward reference, we can't deal with 177165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich it until the end of the pattern, so return -3. */ 177265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 177365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_RECURSE: 177465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!atend) return -3; 177565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cs = ce = (pcre_uchar *)cd->start_code + GET(cc, 1); /* Start subpattern */ 177665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do ce += GET(ce, 1); while (*ce == OP_ALT); /* End subpattern */ 177765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cc > cs && cc < ce) return -1; /* Recursion */ 177865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cd); 177965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (d < 0) return d; 178065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich branchlength += d; 178165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cc += 1 + LINK_SIZE; 178265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 178365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 178465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Skip over assertive subpatterns */ 178565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 178665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ASSERT: 178765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ASSERT_NOT: 178865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ASSERTBACK: 178965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ASSERTBACK_NOT: 179065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do cc += GET(cc, 1); while (*cc == OP_ALT); 179165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cc += PRIV(OP_lengths)[*cc]; 179265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 179365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 179465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Skip over things that don't match chars */ 179565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 179665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MARK: 179765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PRUNE_ARG: 179865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_SKIP_ARG: 179965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_THEN_ARG: 180065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cc += cc[1] + PRIV(OP_lengths)[*cc]; 180165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 180265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 180365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CALLOUT: 180465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CIRC: 180565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CIRCM: 180665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CLOSE: 180765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_COMMIT: 180865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CREF: 180965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_DEF: 181065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_DNCREF: 181165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_DNRREF: 181265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_DOLL: 181365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_DOLLM: 181465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EOD: 181565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EODN: 181665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_FAIL: 181765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_WORD_BOUNDARY: 181865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PRUNE: 181965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_REVERSE: 182065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_RREF: 182165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_SET_SOM: 182265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_SKIP: 182365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_SOD: 182465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_SOM: 182565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_THEN: 182665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_WORD_BOUNDARY: 182765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cc += PRIV(OP_lengths)[*cc]; 182865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 182965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 183065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle literal characters */ 183165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 183265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CHAR: 183365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CHARI: 183465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT: 183565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTI: 183665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich branchlength++; 183765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cc += 2; 183865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF 183965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 184065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 184165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 184265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 184365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle exact repetitions. The count is already in characters, but we 184465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich need to skip over a multibyte character in UTF8 mode. */ 184565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 184665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EXACT: 184765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EXACTI: 184865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTEXACT: 184965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTEXACTI: 185065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich branchlength += (int)GET2(cc,1); 185165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cc += 2 + IMM2_SIZE; 185265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF 185365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 185465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 185565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 185665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 185765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEEXACT: 185865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich branchlength += GET2(cc,1); 185965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cc[1 + IMM2_SIZE] == OP_PROP || cc[1 + IMM2_SIZE] == OP_NOTPROP) 186065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cc += 2; 186165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cc += 1 + IMM2_SIZE + 1; 186265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 186365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 186465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle single-char matchers */ 186565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 186665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PROP: 186765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPROP: 186865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cc += 2; 186965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Fall through */ 187065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 187165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_HSPACE: 187265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_VSPACE: 187365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_HSPACE: 187465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_VSPACE: 187565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_DIGIT: 187665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_DIGIT: 187765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_WHITESPACE: 187865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_WHITESPACE: 187965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_WORDCHAR: 188065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_WORDCHAR: 188165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ANY: 188265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ALLANY: 188365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich branchlength++; 188465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cc++; 188565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 188665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 188765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* The single-byte matcher isn't allowed. This only happens in UTF-8 mode; 188865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich otherwise \C is coded as OP_ALLANY. */ 188965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 189065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ANYBYTE: 189165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return -2; 189265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 189365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Check a class for variable quantification */ 189465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 189565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CLASS: 189665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NCLASS: 189765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 189865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_XCLASS: 189965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* The original code caused an unsigned overflow in 64 bit systems, 190065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich so now we use a conditional statement. */ 190165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (op == OP_XCLASS) 190265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cc += GET(cc, 1); 190365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 190465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cc += PRIV(OP_lengths)[OP_CLASS]; 190565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else 190665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cc += PRIV(OP_lengths)[OP_CLASS]; 190765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 190865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 190965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch (*cc) 191065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 191165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRSTAR: 191265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRMINSTAR: 191365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRPLUS: 191465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRMINPLUS: 191565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRQUERY: 191665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRMINQUERY: 191765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRPOSSTAR: 191865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRPOSPLUS: 191965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRPOSQUERY: 192065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return -1; 192165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 192265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRRANGE: 192365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRMINRANGE: 192465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRPOSRANGE: 192565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1; 192665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich branchlength += (int)GET2(cc,1); 192765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cc += 1 + 2 * IMM2_SIZE; 192865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 192965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 193065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: 193165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich branchlength++; 193265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 193365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 193465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 193565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Anything else is variable length */ 193665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 193765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ANYNL: 193865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_BRAMINZERO: 193965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_BRAPOS: 194065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_BRAPOSZERO: 194165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_BRAZERO: 194265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CBRAPOS: 194365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EXTUNI: 194465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_KETRMAX: 194565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_KETRMIN: 194665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_KETRPOS: 194765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINPLUS: 194865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINPLUSI: 194965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINQUERY: 195065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINQUERYI: 195165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINSTAR: 195265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINSTARI: 195365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINUPTO: 195465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINUPTOI: 195565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINPLUS: 195665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINPLUSI: 195765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINQUERY: 195865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINQUERYI: 195965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINSTAR: 196065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINSTARI: 196165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINUPTO: 196265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINUPTOI: 196365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPLUS: 196465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPLUSI: 196565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSPLUS: 196665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSPLUSI: 196765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSQUERY: 196865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSQUERYI: 196965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSSTAR: 197065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSSTARI: 197165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSUPTO: 197265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSUPTOI: 197365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTQUERY: 197465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTQUERYI: 197565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTSTAR: 197665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTSTARI: 197765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTUPTO: 197865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTUPTOI: 197965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PLUS: 198065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PLUSI: 198165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSPLUS: 198265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSPLUSI: 198365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSQUERY: 198465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSQUERYI: 198565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSSTAR: 198665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSSTARI: 198765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSUPTO: 198865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSUPTOI: 198965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_QUERY: 199065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_QUERYI: 199165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_REF: 199265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_REFI: 199365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_DNREF: 199465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_DNREFI: 199565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_SBRA: 199665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_SBRAPOS: 199765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_SCBRA: 199865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_SCBRAPOS: 199965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_SCOND: 200065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_SKIPZERO: 200165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_STAR: 200265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_STARI: 200365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEMINPLUS: 200465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEMINQUERY: 200565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEMINSTAR: 200665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEMINUPTO: 200765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPLUS: 200865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPOSPLUS: 200965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPOSQUERY: 201065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPOSSTAR: 201165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPOSUPTO: 201265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEQUERY: 201365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPESTAR: 201465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEUPTO: 201565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_UPTO: 201665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_UPTOI: 201765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return -1; 201865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 201965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Catch unrecognized opcodes so that when new ones are added they 202065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich are not forgotten, as has happened in the past. */ 202165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 202265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: 202365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return -4; 202465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 202565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 202665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Control never gets here */ 202765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 202865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 202965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 203065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 203165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 203265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Scan compiled regex for specific bracket * 203365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 203465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 203565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This little function scans through a compiled pattern until it finds a 203665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcapturing bracket with the given number, or, if the number is negative, an 203765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichinstance of OP_REVERSE for a lookbehind. The function is global in the C sense 203865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichso that it can be called from pcre_study() when finding the minimum matching 203965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlength. 204065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 204165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 204265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code points to start of expression 204365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich utf TRUE in UTF-8 / UTF-16 / UTF-32 mode 204465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich number the required bracket number or negative to find a lookbehind 204565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 204665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: pointer to the opcode for the bracket, or NULL if not found 204765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 204865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 204965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar * 205065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPRIV(find_bracket)(const pcre_uchar *code, BOOL utf, int number) 205165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 205265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (;;) 205365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 205465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich register pcre_uchar c = *code; 205565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 205665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == OP_END) return NULL; 205765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 205865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* XCLASS is used for classes that cannot be represented just by a bit 205965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich map. This includes negated single high-valued characters. The length in 206065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the table is zero; the actual length is stored in the compiled code. */ 206165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 206265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == OP_XCLASS) code += GET(code, 1); 206365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 206465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle recursion */ 206565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 206665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (c == OP_REVERSE) 206765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 206865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (number < 0) return (pcre_uchar *)code; 206965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += PRIV(OP_lengths)[c]; 207065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 207165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 207265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle capturing bracket */ 207365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 207465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (c == OP_CBRA || c == OP_SCBRA || 207565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c == OP_CBRAPOS || c == OP_SCBRAPOS) 207665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 207765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int n = (int)GET2(code, 1+LINK_SIZE); 207865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (n == number) return (pcre_uchar *)code; 207965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += PRIV(OP_lengths)[c]; 208065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 208165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 208265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Otherwise, we can get the item's length from the table, except that for 208365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeated character types, we have to test for \p and \P, which have an extra 208465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we 208565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich must add in its length. */ 208665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 208765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 208865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 208965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch(c) 209065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 209165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPESTAR: 209265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEMINSTAR: 209365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPLUS: 209465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEMINPLUS: 209565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEQUERY: 209665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEMINQUERY: 209765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPOSSTAR: 209865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPOSPLUS: 209965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPOSQUERY: 210065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; 210165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 210265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 210365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEUPTO: 210465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEMINUPTO: 210565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEEXACT: 210665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPOSUPTO: 210765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) 210865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += 2; 210965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 211065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 211165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MARK: 211265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PRUNE_ARG: 211365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_SKIP_ARG: 211465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_THEN_ARG: 211565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += code[1]; 211665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 211765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 211865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 211965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Add in the fixed length from the table */ 212065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 212165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += PRIV(OP_lengths)[c]; 212265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 212365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In UTF-8 mode, opcodes that are followed by a character may be followed by 212465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich a multi-byte character. The length in the table is a minimum, so we have to 212565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich arrange to skip the extra bytes. */ 212665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 212765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 212865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf) switch(c) 212965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 213065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CHAR: 213165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CHARI: 213265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EXACT: 213365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EXACTI: 213465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_UPTO: 213565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_UPTOI: 213665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINUPTO: 213765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINUPTOI: 213865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSUPTO: 213965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSUPTOI: 214065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_STAR: 214165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_STARI: 214265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINSTAR: 214365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINSTARI: 214465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSSTAR: 214565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSSTARI: 214665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PLUS: 214765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PLUSI: 214865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINPLUS: 214965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINPLUSI: 215065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSPLUS: 215165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSPLUSI: 215265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_QUERY: 215365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_QUERYI: 215465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINQUERY: 215565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINQUERYI: 215665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSQUERY: 215765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSQUERYI: 215865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); 215965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 216065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 216165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else 216265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (void)(utf); /* Keep compiler happy by referencing function argument */ 216365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 216465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 216565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 216665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 216765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 216865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 216965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 217065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 217165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Scan compiled regex for recursion reference * 217265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 217365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 217465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This little function scans through a compiled pattern until it finds an 217565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichinstance of OP_RECURSE. 217665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 217765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 217865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code points to start of expression 217965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich utf TRUE in UTF-8 / UTF-16 / UTF-32 mode 218065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 218165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: pointer to the opcode for OP_RECURSE, or NULL if not found 218265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 218365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 218465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar * 218565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfind_recurse(const pcre_uchar *code, BOOL utf) 218665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 218765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (;;) 218865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 218965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich register pcre_uchar c = *code; 219065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == OP_END) return NULL; 219165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == OP_RECURSE) return code; 219265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 219365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* XCLASS is used for classes that cannot be represented just by a bit 219465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich map. This includes negated single high-valued characters. The length in 219565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the table is zero; the actual length is stored in the compiled code. */ 219665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 219765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == OP_XCLASS) code += GET(code, 1); 219865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 219965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Otherwise, we can get the item's length from the table, except that for 220065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeated character types, we have to test for \p and \P, which have an extra 220165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we 220265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich must add in its length. */ 220365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 220465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 220565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 220665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch(c) 220765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 220865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPESTAR: 220965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEMINSTAR: 221065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPLUS: 221165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEMINPLUS: 221265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEQUERY: 221365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEMINQUERY: 221465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPOSSTAR: 221565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPOSPLUS: 221665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPOSQUERY: 221765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; 221865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 221965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 222065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPOSUPTO: 222165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEUPTO: 222265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEMINUPTO: 222365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEEXACT: 222465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) 222565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += 2; 222665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 222765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 222865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MARK: 222965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PRUNE_ARG: 223065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_SKIP_ARG: 223165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_THEN_ARG: 223265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += code[1]; 223365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 223465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 223565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 223665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Add in the fixed length from the table */ 223765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 223865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += PRIV(OP_lengths)[c]; 223965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 224065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In UTF-8 mode, opcodes that are followed by a character may be followed 224165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich by a multi-byte character. The length in the table is a minimum, so we have 224265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich to arrange to skip the extra bytes. */ 224365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 224465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 224565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf) switch(c) 224665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 224765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CHAR: 224865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CHARI: 224965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT: 225065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTI: 225165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EXACT: 225265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EXACTI: 225365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTEXACT: 225465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTEXACTI: 225565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_UPTO: 225665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_UPTOI: 225765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTUPTO: 225865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTUPTOI: 225965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINUPTO: 226065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINUPTOI: 226165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINUPTO: 226265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINUPTOI: 226365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSUPTO: 226465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSUPTOI: 226565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSUPTO: 226665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSUPTOI: 226765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_STAR: 226865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_STARI: 226965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTSTAR: 227065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTSTARI: 227165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINSTAR: 227265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINSTARI: 227365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINSTAR: 227465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINSTARI: 227565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSSTAR: 227665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSSTARI: 227765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSSTAR: 227865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSSTARI: 227965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PLUS: 228065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PLUSI: 228165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPLUS: 228265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPLUSI: 228365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINPLUS: 228465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINPLUSI: 228565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINPLUS: 228665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINPLUSI: 228765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSPLUS: 228865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSPLUSI: 228965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSPLUS: 229065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSPLUSI: 229165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_QUERY: 229265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_QUERYI: 229365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTQUERY: 229465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTQUERYI: 229565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINQUERY: 229665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINQUERYI: 229765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINQUERY: 229865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINQUERYI: 229965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSQUERY: 230065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSQUERYI: 230165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSQUERY: 230265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSQUERYI: 230365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); 230465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 230565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 230665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else 230765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (void)(utf); /* Keep compiler happy by referencing function argument */ 230865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 230965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 231065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 231165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 231265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 231365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 231465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 231565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 231665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Scan compiled branch for non-emptiness * 231765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 231865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 231965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function scans through a branch of a compiled pattern to see whether it 232065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcan match the empty string or not. It is called from could_be_empty() 232165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbelow and from compile_branch() when checking for an unlimited repeat of a 232265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichgroup that can match nothing. Note that first_significant_code() skips over 232365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbackward and negative forward assertions when its final argument is TRUE. If we 232465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichhit an unclosed bracket, we return "empty" - this means we've struck an inner 232565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbracket whose current branch will already have been scanned. 232665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 232765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 232865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code points to start of search 232965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich endcode points to where to stop 233065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode 233165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd contains pointers to tables etc. 233265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recurses chain of recurse_check to catch mutual recursion 233365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 233465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: TRUE if what is matched could be empty 233565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 233665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 233765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichtypedef struct recurse_check { 233865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich struct recurse_check *prev; 233965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uchar *group; 234065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} recurse_check; 234165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 234265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL 234365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcould_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode, 234465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich BOOL utf, compile_data *cd, recurse_check *recurses) 234565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 234665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichregister pcre_uchar c; 234765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichrecurse_check this_recurse; 234865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 234965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); 235065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code < endcode; 235165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE)) 235265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 235365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uchar *ccode; 235465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 235565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = *code; 235665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 235765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Skip over forward assertions; the other assertions are skipped by 235865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich first_significant_code() with a TRUE final argument. */ 235965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 236065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == OP_ASSERT) 236165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 236265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do code += GET(code, 1); while (*code == OP_ALT); 236365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = *code; 236465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 236565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 236665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 236765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* For a recursion/subroutine call, if its end has been reached, which 236865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich implies a backward reference subroutine call, we can scan it. If it's a 236965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich forward reference subroutine call, we can't. To detect forward reference 237065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich we have to scan up the list that is kept in the workspace. This function is 237165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich called only when doing the real compile, not during the pre-compile that 237265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich measures the size of the compiled pattern. */ 237365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 237465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == OP_RECURSE) 237565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 237665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uchar *scode = cd->start_code + GET(code, 1); 237765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uchar *endgroup = scode; 237865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich BOOL empty_branch; 237965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 238065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Test for forward reference or uncompleted reference. This is disabled 238165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich when called to scan a completed pattern by setting cd->start_workspace to 238265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich NULL. */ 238365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 238465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cd->start_workspace != NULL) 238565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 238665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uchar *tcode; 238765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (tcode = cd->start_workspace; tcode < cd->hwm; tcode += LINK_SIZE) 238865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((int)GET(tcode, 0) == (int)(code + 1 - cd->start_code)) return TRUE; 238965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (GET(scode, 1) == 0) return TRUE; /* Unclosed */ 239065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 239165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 239265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If the reference is to a completed group, we need to detect whether this 239365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich is a recursive call, as otherwise there will be an infinite loop. If it is 239465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich a recursion, just skip over it. Simple recursions are easily detected. For 239565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich mutual recursions we keep a chain on the stack. */ 239665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 239765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT); 239865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (code >= scode && code <= endgroup) continue; /* Simple recursion */ 239965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 240065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 240165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recurse_check *r = recurses; 240265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (r = recurses; r != NULL; r = r->prev) 240365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (r->group == scode) break; 240465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (r != NULL) continue; /* Mutual recursion */ 240565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 240665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 240765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Completed reference; scan the referenced group, remembering it on the 240865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich stack chain to detect mutual recursions. */ 240965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 241065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich empty_branch = FALSE; 241165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich this_recurse.prev = recurses; 241265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich this_recurse.group = scode; 241365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 241465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do 241565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 241665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse)) 241765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 241865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich empty_branch = TRUE; 241965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 242065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 242165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich scode += GET(scode, 1); 242265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 242365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (*scode == OP_ALT); 242465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 242565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!empty_branch) return FALSE; /* All branches are non-empty */ 242665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 242765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 242865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 242965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Groups with zero repeats can of course be empty; skip them. */ 243065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 243165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO || 243265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c == OP_BRAPOSZERO) 243365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 243465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += PRIV(OP_lengths)[c]; 243565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do code += GET(code, 1); while (*code == OP_ALT); 243665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = *code; 243765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 243865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 243965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 244065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* A nested group that is already marked as "could be empty" can just be 244165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skipped. */ 244265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 244365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == OP_SBRA || c == OP_SBRAPOS || 244465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c == OP_SCBRA || c == OP_SCBRAPOS) 244565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 244665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do code += GET(code, 1); while (*code == OP_ALT); 244765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = *code; 244865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 244965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 245065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 245165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* For other groups, scan the branches. */ 245265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 245365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == OP_BRA || c == OP_BRAPOS || 245465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c == OP_CBRA || c == OP_CBRAPOS || 245565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c == OP_ONCE || c == OP_ONCE_NC || 245665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c == OP_COND) 245765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 245865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich BOOL empty_branch; 245965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (GET(code, 1) == 0) return TRUE; /* Hit unclosed bracket */ 246065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 246165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If a conditional group has only one branch, there is a second, implied, 246265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich empty branch, so just skip over the conditional, because it could be empty. 246365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Otherwise, scan the individual branches of the group. */ 246465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 246565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == OP_COND && code[GET(code, 1)] != OP_ALT) 246665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += GET(code, 1); 246765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 246865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 246965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich empty_branch = FALSE; 247065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do 247165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 247265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd, NULL)) 247365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich empty_branch = TRUE; 247465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += GET(code, 1); 247565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 247665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (*code == OP_ALT); 247765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!empty_branch) return FALSE; /* All branches are non-empty */ 247865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 247965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 248065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = *code; 248165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 248265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 248365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 248465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle the other opcodes */ 248565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 248665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch (c) 248765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 248865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Check for quantifiers after a class. XCLASS is used for classes that 248965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cannot be represented just by a bit map. This includes negated single 249065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich high-valued characters. The length in PRIV(OP_lengths)[] is zero; the 249165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich actual length is stored in the compiled code, so we must update "code" 249265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich here. */ 249365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 249465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 249565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_XCLASS: 249665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ccode = code += GET(code, 1); 249765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto CHECK_CLASS_REPEAT; 249865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 249965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 250065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CLASS: 250165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NCLASS: 250265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ccode = code + PRIV(OP_lengths)[OP_CLASS]; 250365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 250465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 250565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHECK_CLASS_REPEAT: 250665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 250765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 250865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch (*ccode) 250965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 251065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRSTAR: /* These could be empty; continue */ 251165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRMINSTAR: 251265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRQUERY: 251365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRMINQUERY: 251465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRPOSSTAR: 251565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRPOSQUERY: 251665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 251765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 251865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: /* Non-repeat => class must match */ 251965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRPLUS: /* These repeats aren't empty */ 252065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRMINPLUS: 252165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRPOSPLUS: 252265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return FALSE; 252365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 252465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRRANGE: 252565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRMINRANGE: 252665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRPOSRANGE: 252765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (GET2(ccode, 1) > 0) return FALSE; /* Minimum > 0 */ 252865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 252965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 253065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 253165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 253265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Opcodes that must match a character */ 253365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 253465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ANY: 253565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ALLANY: 253665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ANYBYTE: 253765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 253865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PROP: 253965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPROP: 254065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ANYNL: 254165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 254265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_HSPACE: 254365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_HSPACE: 254465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_VSPACE: 254565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_VSPACE: 254665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EXTUNI: 254765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 254865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_DIGIT: 254965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_DIGIT: 255065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_WHITESPACE: 255165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_WHITESPACE: 255265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_WORDCHAR: 255365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_WORDCHAR: 255465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 255565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CHAR: 255665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CHARI: 255765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT: 255865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTI: 255965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 256065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PLUS: 256165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PLUSI: 256265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINPLUS: 256365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINPLUSI: 256465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 256565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPLUS: 256665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPLUSI: 256765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINPLUS: 256865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINPLUSI: 256965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 257065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSPLUS: 257165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSPLUSI: 257265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSPLUS: 257365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSPLUSI: 257465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 257565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EXACT: 257665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EXACTI: 257765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTEXACT: 257865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTEXACTI: 257965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 258065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPLUS: 258165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEMINPLUS: 258265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPOSPLUS: 258365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEEXACT: 258465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 258565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return FALSE; 258665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 258765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* These are going to continue, as they may be empty, but we have to 258865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich fudge the length for the \p and \P cases. */ 258965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 259065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPESTAR: 259165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEMINSTAR: 259265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPOSSTAR: 259365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEQUERY: 259465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEMINQUERY: 259565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPOSQUERY: 259665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; 259765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 259865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 259965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Same for these */ 260065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 260165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEUPTO: 260265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEMINUPTO: 260365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPOSUPTO: 260465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) 260565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += 2; 260665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 260765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 260865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* End of branch */ 260965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 261065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_KET: 261165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_KETRMAX: 261265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_KETRMIN: 261365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_KETRPOS: 261465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ALT: 261565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return TRUE; 261665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 261765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO, 261865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich MINUPTO, and POSUPTO and their caseless and negative versions may be 261965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich followed by a multibyte character. */ 262065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 262165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 262265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_STAR: 262365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_STARI: 262465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTSTAR: 262565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTSTARI: 262665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 262765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINSTAR: 262865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINSTARI: 262965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINSTAR: 263065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINSTARI: 263165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 263265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSSTAR: 263365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSSTARI: 263465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSSTAR: 263565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSSTARI: 263665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 263765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_QUERY: 263865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_QUERYI: 263965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTQUERY: 264065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTQUERYI: 264165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 264265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINQUERY: 264365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINQUERYI: 264465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINQUERY: 264565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINQUERYI: 264665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 264765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSQUERY: 264865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSQUERYI: 264965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSQUERY: 265065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSQUERYI: 265165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 265265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]); 265365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 265465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 265565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_UPTO: 265665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_UPTOI: 265765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTUPTO: 265865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTUPTOI: 265965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 266065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINUPTO: 266165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINUPTOI: 266265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINUPTO: 266365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINUPTOI: 266465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 266565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSUPTO: 266665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSUPTOI: 266765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSUPTO: 266865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSUPTOI: 266965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 267065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]); 267165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 267265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 267365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 267465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* MARK, and PRUNE/SKIP/THEN with an argument must skip over the argument 267565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string. */ 267665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 267765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MARK: 267865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PRUNE_ARG: 267965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_SKIP_ARG: 268065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_THEN_ARG: 268165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += code[1]; 268265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 268365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 268465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* None of the remaining opcodes are required to match a character. */ 268565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 268665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: 268765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 268865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 268965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 269065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 269165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn TRUE; 269265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 269365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 269465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 269565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 269665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 269765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Scan compiled regex for non-emptiness * 269865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 269965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 270065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is called to check for left recursive calls. We want to check 270165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe current branch of the current pattern to see if it could match the empty 270265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstring. If it could, we must look outwards for branches at other levels, 270365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstopping when we pass beyond the bracket which is the subject of the recursion. 270465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichThis function is called only during the real compile, not during the 270565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpre-compile. 270665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 270765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 270865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code points to start of the recursion 270965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich endcode points to where to stop (current RECURSE item) 271065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bcptr points to the chain of current (unclosed) branch starts 271165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode 271265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd pointers to tables etc 271365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 271465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: TRUE if what is matched could be empty 271565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 271665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 271765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL 271865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcould_be_empty(const pcre_uchar *code, const pcre_uchar *endcode, 271965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich branch_chain *bcptr, BOOL utf, compile_data *cd) 272065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 272165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (bcptr != NULL && bcptr->current_branch >= code) 272265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 272365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd, NULL)) 272465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return FALSE; 272565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bcptr = bcptr->outer; 272665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 272765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn TRUE; 272865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 272965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 273065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 273165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 273265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 273365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Base opcode of repeated opcodes * 273465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 273565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 273665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Returns the base opcode for repeated single character type opcodes. If the 273765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichopcode is not a repeated character type, it returns with the original value. 273865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 273965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: c opcode 274065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: base opcode for the type 274165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 274265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 274365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic pcre_uchar 274465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichget_repeat_base(pcre_uchar c) 274565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 274665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn (c > OP_TYPEPOSUPTO)? c : 274765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (c >= OP_TYPESTAR)? OP_TYPESTAR : 274865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (c >= OP_NOTSTARI)? OP_NOTSTARI : 274965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (c >= OP_NOTSTAR)? OP_NOTSTAR : 275065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (c >= OP_STARI)? OP_STARI : 275165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_STAR; 275265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 275365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 275465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 275565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 275665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP 275765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 275865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Check a character and a property * 275965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 276065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 276165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is called by check_auto_possessive() when a property item 276265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis adjacent to a fixed character. 276365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 276465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 276565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c the character 276665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptype the property type 276765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pdata the data for the type 276865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich negated TRUE if it's a negated property (\P or \p{^) 276965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 277065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: TRUE if auto-possessifying is OK 277165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 277265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 277365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL 277465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcheck_char_prop(pcre_uint32 c, unsigned int ptype, unsigned int pdata, 277565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich BOOL negated) 277665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 277765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uint32 *p; 277865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst ucd_record *prop = GET_UCD(c); 277965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 278065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichswitch(ptype) 278165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 278265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case PT_LAMP: 278365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return (prop->chartype == ucp_Lu || 278465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich prop->chartype == ucp_Ll || 278565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich prop->chartype == ucp_Lt) == negated; 278665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 278765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case PT_GC: 278865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated; 278965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 279065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case PT_PC: 279165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return (pdata == prop->chartype) == negated; 279265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 279365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case PT_SC: 279465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return (pdata == prop->script) == negated; 279565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 279665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* These are specials */ 279765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 279865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case PT_ALNUM: 279965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || 280065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated; 280165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 280265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Perl space used to exclude VT, but from Perl 5.18 it is included, which 280365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich means that Perl space and POSIX space are now identical. PCRE was changed 280465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich at release 8.34. */ 280565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 280665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case PT_SPACE: /* Perl space */ 280765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case PT_PXSPACE: /* POSIX space */ 280865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch(c) 280965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 281065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich HSPACE_CASES: 281165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich VSPACE_CASES: 281265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return negated; 281365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 281465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: 281565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated; 281665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 281765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; /* Control never reaches here */ 281865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 281965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case PT_WORD: 282065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || 282165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PRIV(ucp_gentype)[prop->chartype] == ucp_N || 282265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c == CHAR_UNDERSCORE) == negated; 282365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 282465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case PT_CLIST: 282565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich p = PRIV(ucd_caseless_sets) + prop->caseset; 282665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (;;) 282765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 282865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c < *p) return !negated; 282965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == *p++) return negated; 283065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 283165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; /* Control never reaches here */ 283265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 283365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 283465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn FALSE; 283565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 283665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif /* SUPPORT_UCP */ 283765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 283865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 283965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 284065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 284165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Fill the character property list * 284265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 284365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 284465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Checks whether the code points to an opcode that can take part in auto- 284565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpossessification, and if so, fills a list with its properties. 284665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 284765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 284865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code points to start of expression 284965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich utf TRUE if in UTF-8 / UTF-16 / UTF-32 mode 285065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich fcc points to case-flipping table 285165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list points to output list 285265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[0] will be filled with the opcode 285365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[1] will be non-zero if this opcode 285465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich can match an empty character string 285565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[2..7] depends on the opcode 285665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 285765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: points to the start of the next opcode if *code is accepted 285865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich NULL if *code is not accepted 285965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 286065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 286165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic const pcre_uchar * 286265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichget_chr_property_list(const pcre_uchar *code, BOOL utf, 286365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uint8 *fcc, pcre_uint32 *list) 286465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 286565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar c = *code; 286665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar base; 286765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *end; 286865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 chr; 286965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 287065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP 287165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 *clist_dest; 287265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uint32 *clist_src; 287365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else 287465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichutf = utf; /* Suppress "unused parameter" compiler warning */ 287565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 287665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 287765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlist[0] = c; 287865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlist[1] = FALSE; 287965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcode++; 288065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 288165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (c >= OP_STAR && c <= OP_TYPEPOSUPTO) 288265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 288365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich base = get_repeat_base(c); 288465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c -= (base - OP_STAR); 288565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 288665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO) 288765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += IMM2_SIZE; 288865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 288965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT && c != OP_POSPLUS); 289065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 289165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch(base) 289265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 289365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_STAR: 289465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[0] = OP_CHAR; 289565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 289665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 289765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_STARI: 289865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[0] = OP_CHARI; 289965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 290065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 290165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTSTAR: 290265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[0] = OP_NOT; 290365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 290465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 290565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTSTARI: 290665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[0] = OP_NOTI; 290765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 290865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 290965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPESTAR: 291065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[0] = *code; 291165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code++; 291265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 291365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 291465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = list[0]; 291565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 291665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 291765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichswitch(c) 291865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 291965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_DIGIT: 292065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_DIGIT: 292165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_WHITESPACE: 292265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_WHITESPACE: 292365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_WORDCHAR: 292465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_WORDCHAR: 292565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ANY: 292665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ALLANY: 292765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ANYNL: 292865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_HSPACE: 292965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_HSPACE: 293065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_VSPACE: 293165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_VSPACE: 293265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EXTUNI: 293365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EODN: 293465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EOD: 293565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_DOLL: 293665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_DOLLM: 293765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return code; 293865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 293965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CHAR: 294065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT: 294165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich GETCHARINCTEST(chr, code); 294265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[2] = chr; 294365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[3] = NOTACHAR; 294465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return code; 294565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 294665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CHARI: 294765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTI: 294865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[0] = (c == OP_CHARI) ? OP_CHAR : OP_NOT; 294965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich GETCHARINCTEST(chr, code); 295065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[2] = chr; 295165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 295265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP 295365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (chr < 128 || (chr < 256 && !utf)) 295465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[3] = fcc[chr]; 295565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 295665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[3] = UCD_OTHERCASE(chr); 295765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined SUPPORT_UTF || !defined COMPILE_PCRE8 295865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[3] = (chr < 256) ? fcc[chr] : chr; 295965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else 296065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[3] = fcc[chr]; 296165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 296265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 296365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* The othercase might be the same value. */ 296465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 296565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (chr == list[3]) 296665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[3] = NOTACHAR; 296765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 296865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[4] = NOTACHAR; 296965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return code; 297065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 297165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP 297265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PROP: 297365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPROP: 297465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (code[0] != PT_CLIST) 297565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 297665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[2] = code[0]; 297765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[3] = code[1]; 297865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return code + 2; 297965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 298065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 298165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Convert only if we have enough space. */ 298265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 298365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich clist_src = PRIV(ucd_caseless_sets) + code[1]; 298465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich clist_dest = list + 2; 298565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += 2; 298665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 298765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do { 298865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (clist_dest >= list + 8) 298965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 299065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Early return if there is not enough space. This should never 299165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich happen, since all clists are shorter than 5 character now. */ 299265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[2] = code[0]; 299365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[3] = code[1]; 299465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return code; 299565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 299665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *clist_dest++ = *clist_src; 299765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 299865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while(*clist_src++ != NOTACHAR); 299965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 300065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* All characters are stored. The terminating NOTACHAR 300165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich is copied form the clist itself. */ 300265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 300365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[0] = (c == OP_PROP) ? OP_CHAR : OP_NOT; 300465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return code; 300565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 300665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 300765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NCLASS: 300865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CLASS: 300965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 301065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_XCLASS: 301165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == OP_XCLASS) 301265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich end = code + GET(code, 0) - 1; 301365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 301465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 301565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich end = code + 32 / sizeof(pcre_uchar); 301665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 301765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch(*end) 301865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 301965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRSTAR: 302065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRMINSTAR: 302165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRQUERY: 302265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRMINQUERY: 302365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRPOSSTAR: 302465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRPOSQUERY: 302565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[1] = TRUE; 302665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich end++; 302765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 302865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 302965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRPLUS: 303065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRMINPLUS: 303165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRPOSPLUS: 303265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich end++; 303365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 303465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 303565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRRANGE: 303665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRMINRANGE: 303765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRPOSRANGE: 303865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[1] = (GET2(end, 1) == 0); 303965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich end += 1 + 2 * IMM2_SIZE; 304065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 304165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 304265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[2] = (pcre_uint32)(end - code); 304365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return end; 304465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 304565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn NULL; /* Opcode not accepted */ 304665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 304765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 304865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 304965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 305065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 305165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Scan further character sets for match * 305265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 305365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 305465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Checks whether the base and the current opcode have a common character, in 305565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhich case the base cannot be possessified. 305665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 305765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 305865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code points to the byte code 305965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich utf TRUE in UTF-8 / UTF-16 / UTF-32 mode 306065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd static compile data 306165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich base_list the data list of the base opcode 306265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 306365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: TRUE if the auto-possessification is possible 306465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 306565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 306665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL 306765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcompare_opcodes(const pcre_uchar *code, BOOL utf, const compile_data *cd, 306865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uint32 *base_list, const pcre_uchar *base_end) 306965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 307065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar c; 307165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 list[8]; 307265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uint32 *chr_ptr; 307365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uint32 *ochr_ptr; 307465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uint32 *list_ptr; 307565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *next_code; 307665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 307765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *xclass_flags; 307865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 307965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uint8 *class_bitset; 308065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uint8 *set1, *set2, *set_end; 308165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 chr; 308265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL accepted, invert_bits; 308365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL entered_a_group = FALSE; 308465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 308565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Note: the base_list[1] contains whether the current opcode has greedy 308665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich(represented by a non-zero value) quantifier. This is a different from 308765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichother character type lists, which stores here that the character iterator 308865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichmatches to an empty string (also represented by a non-zero value). */ 308965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 309065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor(;;) 309165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 309265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* All operations move the code pointer forward. 309365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Therefore infinite recursions are not possible. */ 309465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 309565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = *code; 309665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 309765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Skip over callouts */ 309865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 309965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == OP_CALLOUT) 310065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 310165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += PRIV(OP_lengths)[c]; 310265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 310365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 310465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 310565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == OP_ALT) 310665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 310765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do code += GET(code, 1); while (*code == OP_ALT); 310865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = *code; 310965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 311065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 311165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch(c) 311265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 311365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_END: 311465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_KETRPOS: 311565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* TRUE only in greedy case. The non-greedy case could be replaced by 311665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich an OP_EXACT, but it is probably not worth it. (And note that OP_EXACT 311765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich uses more memory, which we cannot get at this stage.) */ 311865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 311965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return base_list[1] != 0; 312065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 312165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_KET: 312265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If the bracket is capturing, and referenced by an OP_RECURSE, or 312365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich it is an atomic sub-pattern (assert, once, etc.) the non-greedy case 312465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cannot be converted to a possessive form. */ 312565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 312665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (base_list[1] == 0) return FALSE; 312765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 312865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch(*(code - GET(code, 1))) 312965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 313065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ASSERT: 313165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ASSERT_NOT: 313265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ASSERTBACK: 313365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ASSERTBACK_NOT: 313465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ONCE: 313565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ONCE_NC: 313665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Atomic sub-patterns and assertions can always auto-possessify their 313765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich last iterator. However, if the group was entered as a result of checking 313865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich a previous iterator, this is not possible. */ 313965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 314065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return !entered_a_group; 314165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 314265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 314365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += PRIV(OP_lengths)[c]; 314465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 314565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 314665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ONCE: 314765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ONCE_NC: 314865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_BRA: 314965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CBRA: 315065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich next_code = code + GET(code, 1); 315165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += PRIV(OP_lengths)[c]; 315265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 315365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (*next_code == OP_ALT) 315465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 315565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!compare_opcodes(code, utf, cd, base_list, base_end)) return FALSE; 315665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code = next_code + 1 + LINK_SIZE; 315765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich next_code += GET(next_code, 1); 315865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 315965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 316065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich entered_a_group = TRUE; 316165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 316265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 316365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_BRAZERO: 316465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_BRAMINZERO: 316565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 316665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich next_code = code + 1; 316765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*next_code != OP_BRA && *next_code != OP_CBRA 316865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich && *next_code != OP_ONCE && *next_code != OP_ONCE_NC) return FALSE; 316965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 317065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do next_code += GET(next_code, 1); while (*next_code == OP_ALT); 317165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 317265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* The bracket content will be checked by the 317365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_BRA/OP_CBRA case above. */ 317465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich next_code += 1 + LINK_SIZE; 317565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!compare_opcodes(next_code, utf, cd, base_list, base_end)) 317665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return FALSE; 317765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 317865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += PRIV(OP_lengths)[c]; 317965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 318065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 318165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: 318265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 318365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 318465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 318565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Check for a supported opcode, and load its properties. */ 318665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 318765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code = get_chr_property_list(code, utf, cd->fcc, list); 318865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (code == NULL) return FALSE; /* Unsupported */ 318965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 319065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If either opcode is a small character list, set pointers for comparing 319165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich characters from that list with another list, or with a property. */ 319265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 319365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (base_list[0] == OP_CHAR) 319465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 319565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich chr_ptr = base_list + 2; 319665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list_ptr = list; 319765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 319865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (list[0] == OP_CHAR) 319965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 320065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich chr_ptr = list + 2; 320165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list_ptr = base_list; 320265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 320365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 320465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Character bitsets can also be compared to certain opcodes. */ 320565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 320665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (base_list[0] == OP_CLASS || list[0] == OP_CLASS 320765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef COMPILE_PCRE8 320865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. */ 320965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich || (!utf && (base_list[0] == OP_NCLASS || list[0] == OP_NCLASS)) 321065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 321165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ) 321265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 321365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef COMPILE_PCRE8 321465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (base_list[0] == OP_CLASS || (!utf && base_list[0] == OP_NCLASS)) 321565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else 321665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (base_list[0] == OP_CLASS) 321765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 321865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 321965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich set1 = (pcre_uint8 *)(base_end - base_list[2]); 322065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list_ptr = list; 322165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 322265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 322365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 322465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich set1 = (pcre_uint8 *)(code - list[2]); 322565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list_ptr = base_list; 322665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 322765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 322865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich invert_bits = FALSE; 322965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch(list_ptr[0]) 323065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 323165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CLASS: 323265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NCLASS: 323365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich set2 = (pcre_uint8 *) 323465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ((list_ptr == list ? code : base_end) - list_ptr[2]); 323565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 323665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 323765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 323865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_XCLASS: 323965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE; 324065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE; 324165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((*xclass_flags & XCL_MAP) == 0) 324265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 324365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* No bits are set for characters < 256. */ 324465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (list[1] == 0) return TRUE; 324565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Might be an empty repeat. */ 324665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 324765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 324865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich set2 = (pcre_uint8 *)(xclass_flags + 1); 324965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 325065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 325165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 325265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_DIGIT: 325365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich invert_bits = TRUE; 325465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Fall through */ 325565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_DIGIT: 325665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich set2 = (pcre_uint8 *)(cd->cbits + cbit_digit); 325765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 325865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 325965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_WHITESPACE: 326065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich invert_bits = TRUE; 326165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Fall through */ 326265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_WHITESPACE: 326365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich set2 = (pcre_uint8 *)(cd->cbits + cbit_space); 326465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 326565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 326665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_WORDCHAR: 326765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich invert_bits = TRUE; 326865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Fall through */ 326965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_WORDCHAR: 327065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich set2 = (pcre_uint8 *)(cd->cbits + cbit_word); 327165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 327265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 327365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: 327465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return FALSE; 327565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 327665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 327765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Because the sets are unaligned, we need 327865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich to perform byte comparison here. */ 327965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich set_end = set1 + 32; 328065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (invert_bits) 328165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 328265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do 328365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 328465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((*set1++ & ~(*set2++)) != 0) return FALSE; 328565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 328665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (set1 < set_end); 328765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 328865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 328965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 329065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do 329165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 329265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((*set1++ & *set2++) != 0) return FALSE; 329365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 329465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (set1 < set_end); 329565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 329665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 329765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (list[1] == 0) return TRUE; 329865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Might be an empty repeat. */ 329965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 330065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 330165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 330265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Some property combinations also acceptable. Unicode property opcodes are 330365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich processed specially; the rest can be handled with a lookup table. */ 330465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 330565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 330665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 330765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uint32 leftop, rightop; 330865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 330965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich leftop = base_list[0]; 331065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich rightop = list[0]; 331165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 331265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP 331365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich accepted = FALSE; /* Always set in non-unicode case. */ 331465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (leftop == OP_PROP || leftop == OP_NOTPROP) 331565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 331665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (rightop == OP_EOD) 331765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich accepted = TRUE; 331865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (rightop == OP_PROP || rightop == OP_NOTPROP) 331965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 332065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int n; 332165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uint8 *p; 332265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich BOOL same = leftop == rightop; 332365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich BOOL lisprop = leftop == OP_PROP; 332465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich BOOL risprop = rightop == OP_PROP; 332565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich BOOL bothprop = lisprop && risprop; 332665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 332765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* There's a table that specifies how each combination is to be 332865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich processed: 332965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0 Always return FALSE (never auto-possessify) 333065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 1 Character groups are distinct (possessify if both are OP_PROP) 333165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 2 Check character categories in the same group (general or particular) 333265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 3 Return TRUE if the two opcodes are not the same 333365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ... see comments below 333465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich */ 333565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 333665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich n = propposstab[base_list[2]][list[2]]; 333765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch(n) 333865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 333965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case 0: break; 334065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case 1: accepted = bothprop; break; 334165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case 2: accepted = (base_list[3] == list[3]) != same; break; 334265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case 3: accepted = !same; break; 334365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 334465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case 4: /* Left general category, right particular category */ 334565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich accepted = risprop && catposstab[base_list[3]][list[3]] == same; 334665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 334765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 334865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case 5: /* Right general category, left particular category */ 334965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich accepted = lisprop && catposstab[list[3]][base_list[3]] == same; 335065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 335165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 335265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* This code is logically tricky. Think hard before fiddling with it. 335365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich The posspropstab table has four entries per row. Each row relates to 335465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich one of PCRE's special properties such as ALNUM or SPACE or WORD. 335565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Only WORD actually needs all four entries, but using repeats for the 335665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich others means they can all use the same code below. 335765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 335865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich The first two entries in each row are Unicode general categories, and 335965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich apply always, because all the characters they include are part of the 336065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PCRE character set. The third and fourth entries are a general and a 336165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich particular category, respectively, that include one or more relevant 336265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich characters. One or the other is used, depending on whether the check 336365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich is for a general or a particular category. However, in both cases the 336465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich category contains more characters than the specials that are defined 336565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for the property being tested against. Therefore, it cannot be used 336665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich in a NOTPROP case. 336765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 336865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po. 336965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Underscore is covered by ucp_P or ucp_Po. */ 337065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 337165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case 6: /* Left alphanum vs right general category */ 337265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case 7: /* Left space vs right general category */ 337365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case 8: /* Left word vs right general category */ 337465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich p = posspropstab[n-6]; 337565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich accepted = risprop && lisprop == 337665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (list[3] != p[0] && 337765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[3] != p[1] && 337865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (list[3] != p[2] || !lisprop)); 337965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 338065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 338165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case 9: /* Right alphanum vs left general category */ 338265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case 10: /* Right space vs left general category */ 338365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case 11: /* Right word vs left general category */ 338465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich p = posspropstab[n-9]; 338565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich accepted = lisprop && risprop == 338665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (base_list[3] != p[0] && 338765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich base_list[3] != p[1] && 338865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (base_list[3] != p[2] || !risprop)); 338965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 339065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 339165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case 12: /* Left alphanum vs right particular category */ 339265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case 13: /* Left space vs right particular category */ 339365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case 14: /* Left word vs right particular category */ 339465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich p = posspropstab[n-12]; 339565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich accepted = risprop && lisprop == 339665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (catposstab[p[0]][list[3]] && 339765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich catposstab[p[1]][list[3]] && 339865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (list[3] != p[3] || !lisprop)); 339965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 340065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 340165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case 15: /* Right alphanum vs left particular category */ 340265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case 16: /* Right space vs left particular category */ 340365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case 17: /* Right word vs left particular category */ 340465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich p = posspropstab[n-15]; 340565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich accepted = lisprop && risprop == 340665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (catposstab[p[0]][base_list[3]] && 340765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich catposstab[p[1]][base_list[3]] && 340865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (base_list[3] != p[3] || !risprop)); 340965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 341065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 341165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 341265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 341365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 341465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 341565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif /* SUPPORT_UCP */ 341665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 341765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP && 341865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP && 341965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP]; 342065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 342165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!accepted) return FALSE; 342265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 342365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (list[1] == 0) return TRUE; 342465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Might be an empty repeat. */ 342565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 342665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 342765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 342865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Control reaches here only if one of the items is a small character list. 342965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich All characters are checked against the other side. */ 343065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 343165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do 343265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 343365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich chr = *chr_ptr; 343465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 343565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch(list_ptr[0]) 343665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 343765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CHAR: 343865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ochr_ptr = list_ptr + 2; 343965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do 344065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 344165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (chr == *ochr_ptr) return FALSE; 344265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ochr_ptr++; 344365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 344465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while(*ochr_ptr != NOTACHAR); 344565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 344665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 344765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT: 344865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ochr_ptr = list_ptr + 2; 344965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do 345065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 345165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (chr == *ochr_ptr) 345265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 345365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ochr_ptr++; 345465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 345565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while(*ochr_ptr != NOTACHAR); 345665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*ochr_ptr == NOTACHAR) return FALSE; /* Not found */ 345765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 345865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 345965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* 346065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ 346165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 346265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_DIGIT: 346365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (chr < 256 && (cd->ctypes[chr] & ctype_digit) != 0) return FALSE; 346465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 346565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 346665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_DIGIT: 346765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (chr > 255 || (cd->ctypes[chr] & ctype_digit) == 0) return FALSE; 346865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 346965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 347065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_WHITESPACE: 347165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (chr < 256 && (cd->ctypes[chr] & ctype_space) != 0) return FALSE; 347265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 347365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 347465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_WHITESPACE: 347565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (chr > 255 || (cd->ctypes[chr] & ctype_space) == 0) return FALSE; 347665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 347765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 347865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_WORDCHAR: 347965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (chr < 255 && (cd->ctypes[chr] & ctype_word) != 0) return FALSE; 348065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 348165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 348265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_WORDCHAR: 348365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (chr > 255 || (cd->ctypes[chr] & ctype_word) == 0) return FALSE; 348465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 348565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 348665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_HSPACE: 348765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch(chr) 348865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 348965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich HSPACE_CASES: return FALSE; 349065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: break; 349165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 349265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 349365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 349465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_HSPACE: 349565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch(chr) 349665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 349765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich HSPACE_CASES: break; 349865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: return FALSE; 349965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 350065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 350165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 350265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ANYNL: 350365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_VSPACE: 350465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch(chr) 350565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 350665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich VSPACE_CASES: return FALSE; 350765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: break; 350865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 350965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 351065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 351165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT_VSPACE: 351265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch(chr) 351365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 351465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich VSPACE_CASES: break; 351565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: return FALSE; 351665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 351765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 351865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 351965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_DOLL: 352065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EODN: 352165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch (chr) 352265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 352365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_CR: 352465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_LF: 352565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_VT: 352665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_FF: 352765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_NEL: 352865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef EBCDIC 352965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case 0x2028: 353065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case 0x2029: 353165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif /* Not EBCDIC */ 353265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return FALSE; 353365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 353465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 353565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 353665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EOD: /* Can always possessify before \z */ 353765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 353865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 353965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP 354065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PROP: 354165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPROP: 354265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!check_char_prop(chr, list_ptr[2], list_ptr[3], 354365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list_ptr[0] == OP_NOTPROP)) 354465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return FALSE; 354565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 354665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 354765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 354865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NCLASS: 354965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (chr > 255) return FALSE; 355065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Fall through */ 355165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 355265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CLASS: 355365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (chr > 255) break; 355465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich class_bitset = (pcre_uint8 *) 355565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ((list_ptr == list ? code : base_end) - list_ptr[2]); 355665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((class_bitset[chr >> 3] & (1 << (chr & 7))) != 0) return FALSE; 355765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 355865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 355965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 356065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_XCLASS: 356165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) - 356265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list_ptr[2] + LINK_SIZE, utf)) return FALSE; 356365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 356465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 356565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 356665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: 356765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return FALSE; 356865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 356965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 357065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich chr_ptr++; 357165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 357265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while(*chr_ptr != NOTACHAR); 357365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 357465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* At least one character must be matched from this opcode. */ 357565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 357665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (list[1] == 0) return TRUE; 357765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 357865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 357965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Control never reaches here. There used to be a fail-save return FALSE; here, 358065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbut some compilers complain about an unreachable statement. */ 358165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 358265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 358365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 358465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 358565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 358665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 358765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Scan compiled regex for auto-possession * 358865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 358965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 359065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Replaces single character iterations with their possessive alternatives 359165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif appropriate. This function modifies the compiled opcode! 359265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 359365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 359465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code points to start of the byte code 359565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich utf TRUE in UTF-8 / UTF-16 / UTF-32 mode 359665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd static compile data 359765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 359865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: nothing 359965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 360065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 360165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic void 360265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichauto_possessify(pcre_uchar *code, BOOL utf, const compile_data *cd) 360365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 360465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichregister pcre_uchar c; 360565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *end; 360665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *repeat_opcode; 360765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 list[8]; 360865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 360965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (;;) 361065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 361165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = *code; 361265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 361365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c >= OP_STAR && c <= OP_TYPEPOSUPTO) 361465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 361565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c -= get_repeat_base(c) - OP_STAR; 361665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich end = (c <= OP_MINUPTO) ? 361765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich get_chr_property_list(code, utf, cd->fcc, list) : NULL; 361865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO; 361965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 362065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (end != NULL && compare_opcodes(end, utf, cd, list, end)) 362165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 362265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch(c) 362365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 362465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_STAR: 362565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code += OP_POSSTAR - OP_STAR; 362665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 362765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 362865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINSTAR: 362965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code += OP_POSSTAR - OP_MINSTAR; 363065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 363165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 363265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PLUS: 363365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code += OP_POSPLUS - OP_PLUS; 363465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 363565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 363665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINPLUS: 363765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code += OP_POSPLUS - OP_MINPLUS; 363865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 363965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 364065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_QUERY: 364165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code += OP_POSQUERY - OP_QUERY; 364265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 364365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 364465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINQUERY: 364565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code += OP_POSQUERY - OP_MINQUERY; 364665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 364765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 364865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_UPTO: 364965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code += OP_POSUPTO - OP_UPTO; 365065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 365165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 365265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINUPTO: 365365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code += OP_POSUPTO - OP_MINUPTO; 365465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 365565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 365665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 365765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = *code; 365865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 365965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS) 366065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 366165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 366265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == OP_XCLASS) 366365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeat_opcode = code + GET(code, 1); 366465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 366565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 366665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeat_opcode = code + 1 + (32 / sizeof(pcre_uchar)); 366765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 366865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = *repeat_opcode; 366965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c >= OP_CRSTAR && c <= OP_CRMINRANGE) 367065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 367165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* end must not be NULL. */ 367265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich end = get_chr_property_list(code, utf, cd->fcc, list); 367365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 367465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich list[1] = (c & 1) == 0; 367565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 367665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (compare_opcodes(end, utf, cd, list, end)) 367765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 367865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch (c) 367965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 368065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRSTAR: 368165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRMINSTAR: 368265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *repeat_opcode = OP_CRPOSSTAR; 368365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 368465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 368565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRPLUS: 368665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRMINPLUS: 368765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *repeat_opcode = OP_CRPOSPLUS; 368865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 368965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 369065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRQUERY: 369165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRMINQUERY: 369265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *repeat_opcode = OP_CRPOSQUERY; 369365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 369465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 369565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRRANGE: 369665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRMINRANGE: 369765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *repeat_opcode = OP_CRPOSRANGE; 369865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 369965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 370065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 370165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 370265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = *code; 370365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 370465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 370565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch(c) 370665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 370765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_END: 370865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return; 370965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 371065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPESTAR: 371165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEMINSTAR: 371265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPLUS: 371365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEMINPLUS: 371465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEQUERY: 371565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEMINQUERY: 371665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPOSSTAR: 371765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPOSPLUS: 371865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPOSQUERY: 371965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; 372065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 372165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 372265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEUPTO: 372365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEMINUPTO: 372465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEEXACT: 372565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPOSUPTO: 372665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) 372765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += 2; 372865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 372965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 373065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 373165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_XCLASS: 373265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += GET(code, 1); 373365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 373465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 373565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 373665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MARK: 373765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PRUNE_ARG: 373865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_SKIP_ARG: 373965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_THEN_ARG: 374065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += code[1]; 374165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 374265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 374365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 374465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Add in the fixed length from the table */ 374565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 374665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += PRIV(OP_lengths)[c]; 374765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 374865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In UTF-8 mode, opcodes that are followed by a character may be followed by 374965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich a multi-byte character. The length in the table is a minimum, so we have to 375065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich arrange to skip the extra bytes. */ 375165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 375265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 375365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf) switch(c) 375465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 375565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CHAR: 375665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CHARI: 375765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT: 375865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTI: 375965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_STAR: 376065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINSTAR: 376165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PLUS: 376265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINPLUS: 376365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_QUERY: 376465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINQUERY: 376565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_UPTO: 376665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINUPTO: 376765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EXACT: 376865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSSTAR: 376965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSPLUS: 377065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSQUERY: 377165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSUPTO: 377265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_STARI: 377365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINSTARI: 377465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PLUSI: 377565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINPLUSI: 377665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_QUERYI: 377765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINQUERYI: 377865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_UPTOI: 377965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINUPTOI: 378065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EXACTI: 378165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSSTARI: 378265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSPLUSI: 378365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSQUERYI: 378465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSUPTOI: 378565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTSTAR: 378665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINSTAR: 378765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPLUS: 378865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINPLUS: 378965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTQUERY: 379065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINQUERY: 379165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTUPTO: 379265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINUPTO: 379365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTEXACT: 379465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSSTAR: 379565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSPLUS: 379665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSQUERY: 379765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSUPTO: 379865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTSTARI: 379965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINSTARI: 380065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPLUSI: 380165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINPLUSI: 380265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTQUERYI: 380365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINQUERYI: 380465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTUPTOI: 380565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTMINUPTOI: 380665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTEXACTI: 380765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSSTARI: 380865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSPLUSI: 380965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSQUERYI: 381065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPOSUPTOI: 381165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); 381265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 381365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 381465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else 381565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (void)(utf); /* Keep compiler happy by referencing function argument */ 381665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 381765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 381865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 381965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 382065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 382165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 382265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 382365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Check for POSIX class syntax * 382465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 382565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 382665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is called when the sequence "[:" or "[." or "[=" is 382765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichencountered in a character class. It checks whether this is followed by a 382865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsequence of characters terminated by a matching ":]" or ".]" or "=]". If we 382965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreach an unescaped ']' without the special preceding character, return FALSE. 383065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 383165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichOriginally, this function only recognized a sequence of letters between the 383265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichterminators, but it seems that Perl recognizes any sequence of characters, 383365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthough of course unknown POSIX names are subsequently rejected. Perl gives an 383465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich"Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE 383565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdidn't consider this to be a POSIX class. Likewise for [:1234:]. 383665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 383765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichThe problem in trying to be exactly like Perl is in the handling of escapes. We 383865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichhave to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX 383965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichclass, but [abc[:x\]pqr:]] is (so that an error can be generated). The code 384065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbelow handles the special case of \], but does not try to do any other escape 384165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichprocessing. This makes it different from Perl for cases such as [:l\ower:] 384265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhere Perl recognizes it as the POSIX class "lower" but PCRE does not recognize 384365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich"l\ower". This is a lesser evil than not diagnosing bad classes when Perl does, 384465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichI think. 384565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 384665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichA user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not. 384765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichIt seems that the appearance of a nested POSIX class supersedes an apparent 384865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichexternal class. For example, [:a[:digit:]b:] matches "a", "b", ":", or 384965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevicha digit. 385065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 385165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichIn Perl, unescaped square brackets may also appear as part of class names. For 385265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichexample, [:a[:abc]b:] gives unknown POSIX class "[:abc]b:]". However, for 385365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich[:a[:abc]b][b:] it gives unknown POSIX class "[:abc]b][b:]", which does not 385465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichseem right at all. PCRE does not allow closing square brackets in POSIX class 385565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnames. 385665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 385765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 385865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr pointer to the initial [ 385965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich endptr where to return the end pointer 386065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 386165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: TRUE or FALSE 386265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 386365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 386465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL 386565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcheck_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr) 386665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 386765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar terminator; /* Don't combine these lines; the Solaris cc */ 386865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichterminator = *(++ptr); /* compiler warns about "non-constant" initializer. */ 386965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (++ptr; *ptr != CHAR_NULL; ptr++) 387065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 387165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) 387265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 387365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE; 387465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 387565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 387665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) 387765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 387865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *endptr = ptr; 387965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return TRUE; 388065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 388165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*ptr == CHAR_LEFT_SQUARE_BRACKET && 388265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || 388365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr[1] == CHAR_EQUALS_SIGN) && 388465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich check_posix_syntax(ptr, endptr)) 388565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return FALSE; 388665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 388765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 388865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn FALSE; 388965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 389065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 389165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 389265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 389365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 389465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 389565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Check POSIX class name * 389665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 389765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 389865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is called to check the name given in a POSIX-style class entry 389965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsuch as [:alnum:]. 390065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 390165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 390265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr points to the first letter 390365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich len the length of the name 390465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 390565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: a value representing the name, or -1 if unknown 390665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 390765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 390865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic int 390965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcheck_posix_name(const pcre_uchar *ptr, int len) 391065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 391165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst char *pn = posix_names; 391265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichregister int yield = 0; 391365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (posix_name_lengths[yield] != 0) 391465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 391565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (len == posix_name_lengths[yield] && 391665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich STRNCMP_UC_C8(ptr, pn, (unsigned int)len) == 0) return yield; 391765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pn += posix_name_lengths[yield] + 1; 391865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich yield++; 391965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 392065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn -1; 392165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 392265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 392365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 392465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 392565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Adjust OP_RECURSE items in repeated group * 392665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 392765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 392865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* OP_RECURSE items contain an offset from the start of the regex to the group 392965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthat is referenced. This means that groups can be replicated for fixed 393065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichrepetition simply by copying (because the recursion is allowed to refer to 393165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichearlier groups that are outside the current group). However, when a group is 393265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichoptional (i.e. the minimum quantifier is zero), OP_BRAZERO or OP_SKIPZERO is 393365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichinserted before it, after it has been compiled. This means that any OP_RECURSE 393465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichitems within it that refer to the group itself or any contained groups have to 393565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichhave their offsets adjusted. That one of the jobs of this function. Before it 393665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis called, the partially compiled regex must be temporarily terminated with 393765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichOP_END. 393865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 393965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichThis function has been extended with the possibility of forward references for 394065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichrecursions and subroutine calls. It must also check the list of such references 394165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor the group we are dealing with. If it finds that one of the recursions in 394265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe current group is on this list, it adjusts the offset in the list, not the 394365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvalue in the reference (which is a group number). 394465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 394565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 394665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich group points to the start of the group 394765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich adjust the amount by which the group is to be moved 394865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich utf TRUE in UTF-8 / UTF-16 / UTF-32 mode 394965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd contains pointers to tables etc. 395065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich save_hwm the hwm forward reference pointer at the start of the group 395165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 395265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: nothing 395365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 395465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 395565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic void 395665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichadjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd, 395765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *save_hwm) 395865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 395965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *ptr = group; 396065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 396165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL) 396265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 396365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int offset; 396465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *hc; 396565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 396665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* See if this recursion is on the forward reference list. If so, adjust the 396765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reference. */ 396865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 396965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE) 397065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 397165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich offset = (int)GET(hc, 0); 397265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cd->start_code + offset == ptr + 1) 397365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 397465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT(hc, 0, offset + adjust); 397565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 397665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 397765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 397865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 397965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Otherwise, adjust the recursion offset if it's after the start of this 398065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich group. */ 398165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 398265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (hc >= cd->hwm) 398365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 398465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich offset = (int)GET(ptr, 1); 398565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust); 398665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 398765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 398865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr += 1 + LINK_SIZE; 398965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 399065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 399165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 399265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 399365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 399465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 399565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Insert an automatic callout point * 399665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 399765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 399865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is called when the PCRE_AUTO_CALLOUT option is set, to insert 399965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcallout points before each pattern item. 400065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 400165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 400265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code current code pointer 400365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr current pattern pointer 400465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd pointers to tables etc 400565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 400665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: new code pointer 400765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 400865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 400965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic pcre_uchar * 401065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichauto_callout(pcre_uchar *code, const pcre_uchar *ptr, compile_data *cd) 401165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 401265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*code++ = OP_CALLOUT; 401365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*code++ = 255; 401465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPUT(code, 0, (int)(ptr - cd->start_pattern)); /* Pattern offset */ 401565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPUT(code, LINK_SIZE, 0); /* Default length */ 401665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn code + 2 * LINK_SIZE; 401765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 401865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 401965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 402065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 402165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 402265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Complete a callout item * 402365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 402465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 402565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* A callout item contains the length of the next item in the pattern, which 402665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwe can't fill in till after we have reached the relevant point. This is used 402765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor both automatic and manual callouts. 402865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 402965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 403065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous_callout points to previous callout item 403165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr current pattern pointer 403265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd pointers to tables etc 403365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 403465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: nothing 403565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 403665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 403765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic void 403865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcomplete_callout(pcre_uchar *previous_callout, const pcre_uchar *ptr, compile_data *cd) 403965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 404065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2)); 404165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPUT(previous_callout, 2 + LINK_SIZE, length); 404265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 404365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 404465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 404565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 404665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP 404765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 404865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Get othercase range * 404965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 405065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 405165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is passed the start and end of a class range, in UTF-8 mode 405265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwith UCP support. It searches up the characters, looking for ranges of 405365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcharacters in the "other" case. Each call returns the next one, updating the 405465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstart address. A character with multiple other cases is returned on its own 405565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwith a special return value. 405665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 405765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 405865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cptr points to starting character value; updated 405965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich d end value 406065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ocptr where to put start of othercase range 406165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich odptr where to put end of othercase range 406265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 406365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichYield: -1 when no more 406465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 0 when a range is returned 406565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich >0 the CASESET offset for char with multiple other cases 406665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich in this case, ocptr contains the original 406765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 406865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 406965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic int 407065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichget_othercase_range(pcre_uint32 *cptr, pcre_uint32 d, pcre_uint32 *ocptr, 407165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uint32 *odptr) 407265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 407365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 c, othercase, next; 407465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichunsigned int co; 407565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 407665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Find the first character that has an other case. If it has multiple other 407765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcases, return its case offset value. */ 407865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 407965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (c = *cptr; c <= d; c++) 408065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 408165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((co = UCD_CASESET(c)) != 0) 408265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 408365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *ocptr = c++; /* Character that has the set */ 408465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *cptr = c; /* Rest of input range */ 408565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return (int)co; 408665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 408765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((othercase = UCD_OTHERCASE(c)) != c) break; 408865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 408965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 409065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (c > d) return -1; /* Reached end of range */ 409165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 409265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Found a character that has a single other case. Search for the end of the 409365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichrange, which is either the end of the input range, or a character that has zero 409465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichor more than one other cases. */ 409565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 409665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*ocptr = othercase; 409765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnext = othercase + 1; 409865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 409965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (++c; c <= d; c++) 410065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 410165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((co = UCD_CASESET(c)) != 0 || UCD_OTHERCASE(c) != next) break; 410265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich next++; 410365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 410465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 410565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*odptr = next - 1; /* End of othercase range */ 410665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*cptr = c; /* Rest of input range */ 410765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn 0; 410865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 410965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif /* SUPPORT_UCP */ 411065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 411165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 411265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 411365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 411465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Add a character or range to a class * 411565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 411665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 411765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function packages up the logic of adding a character or range of 411865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcharacters to a class. The character values in the arguments will be within the 411965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvalid values for the current mode (8-bit, 16-bit, UTF, etc). This function is 412065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichmutually recursive with the function immediately below. 412165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 412265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 412365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich classbits the bit map for characters < 256 412465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich uchardptr points to the pointer for extra data 412565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich options the options word 412665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd contains pointers to tables etc. 412765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich start start of range character 412865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich end end of range character 412965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 413065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: the number of < 256 characters added 413165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the pointer to extra data is updated 413265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 413365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 413465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic int 413565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichadd_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, int options, 413665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich compile_data *cd, pcre_uint32 start, pcre_uint32 end) 413765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 413865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 c; 413965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 classbits_end = (end <= 0xff ? end : 0xff); 414065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint n8 = 0; 414165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 414265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If caseless matching is required, scan the range and process alternate 414365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcases. In Unicode, there are 8-bit characters that have alternate cases that 414465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichare greater than 255 and vice-versa. Sometimes we can just extend the original 414565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichrange. */ 414665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 414765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif ((options & PCRE_CASELESS) != 0) 414865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 414965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP 415065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((options & PCRE_UTF8) != 0) 415165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 415265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int rc; 415365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uint32 oc, od; 415465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 415565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich options &= ~PCRE_CASELESS; /* Remove for recursive calls */ 415665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = start; 415765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 415865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0) 415965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 416065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle a single character that has more than one other case. */ 416165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 416265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (rc > 0) n8 += add_list_to_class(classbits, uchardptr, options, cd, 416365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PRIV(ucd_caseless_sets) + rc, oc); 416465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 416565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Do nothing if the other case range is within the original range. */ 416665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 416765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (oc >= start && od <= end) continue; 416865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 416965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Extend the original range if there is overlap, noting that if oc < c, we 417065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich can't have od > end because a subrange is always shorter than the basic 417165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich range. Otherwise, use a recursive call to add the additional range. */ 417265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 417365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */ 417465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (od > end && oc <= end + 1) end = od; /* Extend upwards */ 417565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else n8 += add_to_class(classbits, uchardptr, options, cd, oc, od); 417665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 417765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 417865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 417965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif /* SUPPORT_UCP */ 418065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 418165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Not UTF-mode, or no UCP */ 418265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 418365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (c = start; c <= classbits_end; c++) 418465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 418565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich SETBIT(classbits, cd->fcc[c]); 418665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich n8++; 418765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 418865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 418965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 419065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Now handle the original range. Adjust the final value according to the bit 419165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlength - this means that the same lists of (e.g.) horizontal spaces can be used 419265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichin all cases. */ 419365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 419465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8 419565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF 419665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((options & PCRE_UTF8) == 0) 419765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 419865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (end > 0xff) end = 0xff; 419965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 420065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16 420165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF 420265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((options & PCRE_UTF16) == 0) 420365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 420465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (end > 0xffff) end = 0xffff; 420565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 420665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif /* COMPILE_PCRE[8|16] */ 420765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 420865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Use the bitmap for characters < 256. Otherwise use extra data.*/ 420965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 421065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (c = start; c <= classbits_end; c++) 421165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 421265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Regardless of start, c will always be <= 255. */ 421365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich SETBIT(classbits, c); 421465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich n8++; 421565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 421665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 421765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 421865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (start <= 0xff) start = 0xff + 1; 421965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 422065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (end >= start) 422165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 422265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *uchardata = *uchardptr; 422365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF 422465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((options & PCRE_UTF8) != 0) /* All UTFs use the same flag bit */ 422565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 422665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (start < end) 422765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 422865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *uchardata++ = XCL_RANGE; 422965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich uchardata += PRIV(ord2utf)(start, uchardata); 423065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich uchardata += PRIV(ord2utf)(end, uchardata); 423165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 423265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (start == end) 423365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 423465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *uchardata++ = XCL_SINGLE; 423565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich uchardata += PRIV(ord2utf)(start, uchardata); 423665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 423765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 423865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 423965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif /* SUPPORT_UTF */ 424065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 424165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Without UTF support, character values are constrained by the bit length, 424265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich and can only be > 256 for 16-bit and 32-bit libraries. */ 424365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 424465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef COMPILE_PCRE8 424565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich {} 424665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else 424765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (start < end) 424865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 424965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *uchardata++ = XCL_RANGE; 425065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *uchardata++ = start; 425165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *uchardata++ = end; 425265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 425365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (start == end) 425465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 425565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *uchardata++ = XCL_SINGLE; 425665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *uchardata++ = start; 425765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 425865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 425965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 426065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *uchardptr = uchardata; /* Updata extra data pointer */ 426165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 426265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */ 426365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 426465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn n8; /* Number of 8-bit characters */ 426565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 426665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 426765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 426865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 426965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 427065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 427165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Add a list of characters to a class * 427265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 427365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 427465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is used for adding a list of case-equivalent characters to a 427565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichclass, and also for adding a list of horizontal or vertical whitespace. If the 427665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlist is in order (which it should be), ranges of characters are detected and 427765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichhandled appropriately. This function is mutually recursive with the function 427865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichabove. 427965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 428065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 428165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich classbits the bit map for characters < 256 428265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich uchardptr points to the pointer for extra data 428365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich options the options word 428465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd contains pointers to tables etc. 428565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich p points to row of 32-bit values, terminated by NOTACHAR 428665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich except character to omit; this is used when adding lists of 428765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case-equivalent characters to avoid including the one we 428865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich already know about 428965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 429065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: the number of < 256 characters added 429165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the pointer to extra data is updated 429265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 429365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 429465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic int 429565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichadd_list_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, int options, 429665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich compile_data *cd, const pcre_uint32 *p, unsigned int except) 429765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 429865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint n8 = 0; 429965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (p[0] < NOTACHAR) 430065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 430165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int n = 0; 430265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (p[0] != except) 430365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 430465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while(p[n+1] == p[0] + n + 1) n++; 430565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich n8 += add_to_class(classbits, uchardptr, options, cd, p[0], p[n]); 430665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 430765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich p += n + 1; 430865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 430965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn n8; 431065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 431165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 431265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 431365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 431465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 431565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Add characters not in a list to a class * 431665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 431765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 431865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is used for adding the complement of a list of horizontal or 431965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvertical whitespace to a class. The list must be in order. 432065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 432165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 432265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich classbits the bit map for characters < 256 432365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich uchardptr points to the pointer for extra data 432465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich options the options word 432565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd contains pointers to tables etc. 432665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich p points to row of 32-bit values, terminated by NOTACHAR 432765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 432865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: the number of < 256 characters added 432965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the pointer to extra data is updated 433065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 433165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 433265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic int 433365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichadd_not_list_to_class(pcre_uint8 *classbits, pcre_uchar **uchardptr, 433465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int options, compile_data *cd, const pcre_uint32 *p) 433565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 433665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL utf = (options & PCRE_UTF8) != 0; 433765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint n8 = 0; 433865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (p[0] > 0) 433965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich n8 += add_to_class(classbits, uchardptr, options, cd, 0, p[0] - 1); 434065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (p[0] < NOTACHAR) 434165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 434265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (p[1] == p[0] + 1) p++; 434365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich n8 += add_to_class(classbits, uchardptr, options, cd, p[0] + 1, 434465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1); 434565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich p++; 434665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 434765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn n8; 434865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 434965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 435065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 435165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 435265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 435365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Compile one branch * 435465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 435565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 435665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Scan the pattern, compiling it into the a vector. If the options are 435765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichchanged during the branch, the pointer is used to change the external options 435865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbits. This function is used during the pre-compile phase when we are trying 435965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichto find out the amount of memory needed, as well as during the real compile 436065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichphase. The value of lengthptr distinguishes the two phases. 436165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 436265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 436365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich optionsptr pointer to the option bits 436465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich codeptr points to the pointer to the current code point 436565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptrptr points to the current pattern pointer 436665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich errorcodeptr points to error code variable 436765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich firstcharptr place to put the first required character 436865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich firstcharflagsptr place to put the first character flags, or a negative number 436965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqcharptr place to put the last required character 437065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqcharflagsptr place to put the last required character flags, or a negative number 437165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bcptr points to current branch chain 437265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cond_depth conditional nesting depth 437365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd contains pointers to tables etc. 437465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich lengthptr NULL during the real compile phase 437565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich points to length accumulator during pre-compile phase 437665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 437765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: TRUE on success 437865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich FALSE, with *errorcodeptr set non-zero on error 437965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 438065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 438165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL 438265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcompile_branch(int *optionsptr, pcre_uchar **codeptr, 438365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uchar **ptrptr, int *errorcodeptr, 438465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uint32 *firstcharptr, pcre_int32 *firstcharflagsptr, 438565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uint32 *reqcharptr, pcre_int32 *reqcharflagsptr, 438665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich branch_chain *bcptr, int cond_depth, 438765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich compile_data *cd, int *lengthptr) 438865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 438965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint repeat_type, op_type; 439065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint repeat_min = 0, repeat_max = 0; /* To please picky compilers */ 439165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint bravalue = 0; 439265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint greedy_default, greedy_non_default; 439365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 firstchar, reqchar; 439465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_int32 firstcharflags, reqcharflags; 439565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 zeroreqchar, zerofirstchar; 439665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_int32 zeroreqcharflags, zerofirstcharflags; 439765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_int32 req_caseopt, reqvary, tempreqvary; 439865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint options = *optionsptr; /* May change dynamically */ 439965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint after_manual_callout = 0; 440065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint length_prevgroup = 0; 440165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichregister pcre_uint32 c; 440265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint escape; 440365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichregister pcre_uchar *code = *codeptr; 440465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *last_code = code; 440565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *orig_code = code; 440665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *tempcode; 440765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL inescq = FALSE; 440865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL groupsetfirstchar = FALSE; 440965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *ptr = *ptrptr; 441065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *tempptr; 441165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *nestptr = NULL; 441265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *previous = NULL; 441365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *previous_callout = NULL; 441465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *save_hwm = NULL; 441565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint8 classbits[32]; 441665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 441765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* We can fish out the UTF-8 setting once and for all into a BOOL, but we 441865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichmust not do this for other options (e.g. PCRE_EXTENDED) because they may change 441965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdynamically as we process the pattern. */ 442065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 442165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF 442265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */ 442365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL utf = (options & PCRE_UTF8) != 0; 442465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef COMPILE_PCRE32 442565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar utf_chars[6]; 442665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 442765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else 442865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL utf = FALSE; 442965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 443065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 443165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Helper variables for OP_XCLASS opcode (for characters > 255). We define 443265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichclass_uchardata always so that it can be passed to add_to_class() always, 443365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthough it will not be used in non-UTF 8-bit cases. This avoids having to supply 443465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichalternative calls for the different cases. */ 443565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 443665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *class_uchardata; 443765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 443865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL xclass; 443965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *class_uchardata_base; 444065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 444165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 444265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef PCRE_DEBUG 444365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (lengthptr != NULL) DPRINTF((">> start branch\n")); 444465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 444565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 444665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Set up the default and non-default settings for greediness */ 444765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 444865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichgreedy_default = ((options & PCRE_UNGREEDY) != 0); 444965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichgreedy_non_default = greedy_default ^ 1; 445065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 445165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Initialize no first byte, no required byte. REQ_UNSET means "no char 445265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichmatching encountered yet". It gets changed to REQ_NONE if we hit something that 445365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichmatches a non-fixed char first char; reqchar just remains unset if we never 445465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfind one. 445565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 445665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichWhen we hit a repeat whose minimum is zero, we may have to adjust these values 445765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichto take the zero repeat into account. This is implemented by setting them to 445865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichzerofirstbyte and zeroreqchar when such a repeat is encountered. The individual 445965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichitem types that can be repeated set these backoff variables appropriately. */ 446065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 446165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfirstchar = reqchar = zerofirstchar = zeroreqchar = 0; 446265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfirstcharflags = reqcharflags = zerofirstcharflags = zeroreqcharflags = REQ_UNSET; 446365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 446465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* The variable req_caseopt contains either the REQ_CASELESS value 446565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichor zero, according to the current setting of the caseless flag. The 446665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichREQ_CASELESS leaves the lower 28 bit empty. It is added into the 446765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfirstchar or reqchar variables to record the case status of the 446865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvalue. This is used only for ASCII characters. */ 446965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 447065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreq_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS:0; 447165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 447265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Switch on next character until the end of the branch */ 447365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 447465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (;; ptr++) 447565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 447665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich BOOL negate_class; 447765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich BOOL should_flip_negation; 447865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich BOOL possessive_quantifier; 447965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich BOOL is_quantifier; 448065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich BOOL is_recurse; 448165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich BOOL reset_bracount; 448265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int class_has_8bitchar; 448365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int class_one_char; 448465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 448565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich BOOL xclass_has_prop; 448665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 448765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int newoptions; 448865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int recno; 448965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int refsign; 449065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int skipbytes; 449165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uint32 subreqchar, subfirstchar; 449265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_int32 subreqcharflags, subfirstcharflags; 449365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int terminator; 449465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich unsigned int mclength; 449565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich unsigned int tempbracount; 449665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uint32 ec; 449765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar mcbuffer[8]; 449865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 449965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Get next character in the pattern */ 450065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 450165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = *ptr; 450265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 450365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If we are at the end of a nested substitution, revert to the outer level 450465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string. Nesting only happens one level deep. */ 450565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 450665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == CHAR_NULL && nestptr != NULL) 450765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 450865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr = nestptr; 450965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich nestptr = NULL; 451065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = *ptr; 451165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 451265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 451365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If we are in the pre-compile phase, accumulate the length used for the 451465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous cycle of this loop. */ 451565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 451665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lengthptr != NULL) 451765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 451865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef PCRE_DEBUG 451965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (code > cd->hwm) cd->hwm = code; /* High water info */ 452065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 452165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (code > cd->start_workspace + cd->workspace_size - 452265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich WORK_SIZE_SAFETY_MARGIN) /* Check for overrun */ 452365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 452465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR52; 452565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 452665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 452765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 452865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* There is at least one situation where code goes backwards: this is the 452965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case of a zero quantifier after a class (e.g. [ab]{0}). At compile time, 453065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the class is simply eliminated. However, it is created first, so we have to 453165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich allow memory for it. Therefore, don't ever reduce the length at this point. 453265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich */ 453365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 453465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (code < last_code) code = last_code; 453565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 453665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Paranoid check for integer overflow */ 453765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 453865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (OFLOW_MAX - *lengthptr < code - last_code) 453965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 454065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR20; 454165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 454265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 454365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 454465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *lengthptr += (int)(code - last_code); 454565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich DPRINTF(("length=%d added %d c=%c (0x%x)\n", *lengthptr, 454665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (int)(code - last_code), c, c)); 454765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 454865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If "previous" is set and it is not at the start of the work space, move 454965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich it back to there, in order to avoid filling up the work space. Otherwise, 455065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if "previous" is NULL, reset the current code pointer to the start. */ 455165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 455265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (previous != NULL) 455365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 455465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (previous > orig_code) 455565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 455665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memmove(orig_code, previous, IN_UCHARS(code - previous)); 455765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code -= previous - orig_code; 455865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous = orig_code; 455965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 456065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 456165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else code = orig_code; 456265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 456365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Remember where this code item starts so we can pick up the length 456465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich next time round. */ 456565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 456665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich last_code = code; 456765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 456865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 456965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In the real compile phase, just check the workspace used by the forward 457065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reference list. */ 457165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 457265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (cd->hwm > cd->start_workspace + cd->workspace_size - 457365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich WORK_SIZE_SAFETY_MARGIN) 457465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 457565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR52; 457665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 457765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 457865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 457965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If in \Q...\E, check for the end; if not, we have a literal */ 458065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 458165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (inescq && c != CHAR_NULL) 458265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 458365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E) 458465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 458565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich inescq = FALSE; 458665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 458765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 458865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 458965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 459065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 459165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (previous_callout != NULL) 459265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 459365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lengthptr == NULL) /* Don't attempt in pre-compile phase */ 459465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich complete_callout(previous_callout, ptr, cd); 459565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous_callout = NULL; 459665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 459765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((options & PCRE_AUTO_CALLOUT) != 0) 459865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 459965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous_callout = code; 460065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code = auto_callout(code, ptr, cd); 460165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 460265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto NORMAL_CHAR; 460365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 460465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Control does not reach here. */ 460565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 460665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 460765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In extended mode, skip white space and comments. We need a loop in order 460865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich to check for more white space and more comments after a comment. */ 460965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 461065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((options & PCRE_EXTENDED) != 0) 461165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 461265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (;;) 461365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 461465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr); 461565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c != CHAR_NUMBER_SIGN) break; 461665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 461765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (*ptr != CHAR_NULL) 461865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 461965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */ 462065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { /* IS_NEWLINE sets cd->nllen. */ 462165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr += cd->nllen; 462265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 462365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 462465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 462565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF 462665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf) FORWARDCHAR(ptr); 462765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 462865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 462965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = *ptr; /* Either NULL or the char after a newline */ 463065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 463165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 463265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 463365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* See if the next thing is a quantifier. */ 463465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 463565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich is_quantifier = 463665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK || 463765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1)); 463865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 463965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Fill in length of a previous callout, except when the next thing is a 464065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich quantifier or when processing a property substitution string in UCP mode. */ 464165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 464265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!is_quantifier && previous_callout != NULL && nestptr == NULL && 464365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich after_manual_callout-- <= 0) 464465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 464565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lengthptr == NULL) /* Don't attempt in pre-compile phase */ 464665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich complete_callout(previous_callout, ptr, cd); 464765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous_callout = NULL; 464865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 464965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 465065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Create auto callout, except for quantifiers, or while processing property 465165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich strings that are substituted for \w etc in UCP mode. */ 465265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 465365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((options & PCRE_AUTO_CALLOUT) != 0 && !is_quantifier && nestptr == NULL) 465465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 465565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous_callout = code; 465665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code = auto_callout(code, ptr, cd); 465765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 465865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 465965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Process the next pattern item. */ 466065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 466165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch(c) 466265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 466365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* ===================================================================*/ 466465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_NULL: /* The branch terminates at string end */ 466565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_VERTICAL_LINE: /* or | or ) */ 466665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_RIGHT_PARENTHESIS: 466765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *firstcharptr = firstchar; 466865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *firstcharflagsptr = firstcharflags; 466965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *reqcharptr = reqchar; 467065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *reqcharflagsptr = reqcharflags; 467165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *codeptr = code; 467265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *ptrptr = ptr; 467365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lengthptr != NULL) 467465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 467565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (OFLOW_MAX - *lengthptr < code - last_code) 467665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 467765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR20; 467865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 467965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 468065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *lengthptr += (int)(code - last_code); /* To include callout length */ 468165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich DPRINTF((">> end branch\n")); 468265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 468365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return TRUE; 468465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 468565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 468665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* ===================================================================*/ 468765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle single-character metacharacters. In multiline mode, ^ disables 468865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the setting of any following char as a first character. */ 468965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 469065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_CIRCUMFLEX_ACCENT: 469165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous = NULL; 469265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((options & PCRE_MULTILINE) != 0) 469365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 469465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (firstcharflags == REQ_UNSET) 469565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zerofirstcharflags = firstcharflags = REQ_NONE; 469665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_CIRCM; 469765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 469865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else *code++ = OP_CIRC; 469965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 470065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 470165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_DOLLAR_SIGN: 470265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous = NULL; 470365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = ((options & PCRE_MULTILINE) != 0)? OP_DOLLM : OP_DOLL; 470465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 470565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 470665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* There can never be a first char if '.' is first, whatever happens about 470765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeats. The value of reqchar doesn't change either. */ 470865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 470965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_DOT: 471065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; 471165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zerofirstchar = firstchar; 471265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zerofirstcharflags = firstcharflags; 471365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zeroreqchar = reqchar; 471465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zeroreqcharflags = reqcharflags; 471565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous = code; 471665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY; 471765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 471865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 471965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 472065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* ===================================================================*/ 472165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Character classes. If the included characters are all < 256, we build a 472265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 32-byte bitmap of the permitted characters, except in the special case 472365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich where there is only one such character. For negated classes, we build the 472465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich map as usual, then invert it at the end. However, we use a different opcode 472565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich so that data characters > 255 can be handled correctly. 472665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 472765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich If the class contains characters outside the 0-255 range, a different 472865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich opcode is compiled. It may optionally have a bit map for characters < 256, 472965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich but those above are are explicitly listed afterwards. A flag byte tells 473065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich whether the bitmap is present, and whether this is a negated class or not. 473165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 473265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich In JavaScript compatibility mode, an isolated ']' causes an error. In 473365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default (Perl) mode, it is treated as a data character. */ 473465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 473565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_RIGHT_SQUARE_BRACKET: 473665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0) 473765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 473865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR64; 473965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 474065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 474165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto NORMAL_CHAR; 474265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 474365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In another (POSIX) regex library, the ugly syntax [[:<:]] and [[:>:]] is 474465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich used for "start of word" and "end of word". As these are otherwise illegal 474565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich sequences, we don't break anything by recognizing them. They are replaced 474665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich by \b(?=\w) and \b(?<=\w) respectively. Sequences like [a[:<:]] are 474765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich erroneous and are handled by the normal code below. */ 474865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 474965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_LEFT_SQUARE_BRACKET: 475065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (STRNCMP_UC_C8(ptr+1, STRING_WEIRD_STARTWORD, 6) == 0) 475165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 475265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich nestptr = ptr + 7; 475365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr = sub_start_of_word - 1; 475465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 475565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 475665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 475765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (STRNCMP_UC_C8(ptr+1, STRING_WEIRD_ENDWORD, 6) == 0) 475865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 475965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich nestptr = ptr + 7; 476065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr = sub_end_of_word - 1; 476165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 476265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 476365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 476465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle a real character class. */ 476565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 476665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous = code; 476765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 476865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* PCRE supports POSIX class stuff inside a class. Perl gives an error if 476965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich they are encountered at the top level, so we'll do that too. */ 477065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 477165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || 477265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr[1] == CHAR_EQUALS_SIGN) && 477365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich check_posix_syntax(ptr, &tempptr)) 477465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 477565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = (ptr[1] == CHAR_COLON)? ERR13 : ERR31; 477665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 477765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 477865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 477965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If the first character is '^', set the negation flag and skip it. Also, 478065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if the first few characters (either before or after ^) are \Q\E or \E we 478165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skip them too. This makes for compatibility with Perl. */ 478265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 478365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich negate_class = FALSE; 478465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (;;) 478565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 478665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = *(++ptr); 478765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == CHAR_BACKSLASH) 478865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 478965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (ptr[1] == CHAR_E) 479065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 479165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (STRNCMP_UC_C8(ptr + 1, STR_Q STR_BACKSLASH STR_E, 3) == 0) 479265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr += 3; 479365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 479465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 479565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 479665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT) 479765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich negate_class = TRUE; 479865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else break; 479965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 480065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 480165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Empty classes are allowed in JavaScript compatibility mode. Otherwise, 480265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich an initial ']' is taken as a data character -- the code below handles 480365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich that. In JS mode, [] must always fail, so generate OP_FAIL, whereas 480465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich [^] must match any character, so generate OP_ALLANY. */ 480565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 480665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == CHAR_RIGHT_SQUARE_BRACKET && 480765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0) 480865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 480965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = negate_class? OP_ALLANY : OP_FAIL; 481065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; 481165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zerofirstchar = firstchar; 481265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zerofirstcharflags = firstcharflags; 481365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 481465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 481565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 481665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If a class contains a negative special such as \S, we need to flip the 481765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich negation flag at the end, so that support for characters > 255 works 481865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich correctly (they are all included in the class). */ 481965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 482065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich should_flip_negation = FALSE; 482165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 482265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Extended class (xclass) will be used when characters > 255 482365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich might match. */ 482465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 482565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 482665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich xclass = FALSE; 482765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */ 482865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich class_uchardata_base = class_uchardata; /* Save the start */ 482965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 483065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 483165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* For optimization purposes, we track some properties of the class: 483265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich class_has_8bitchar will be non-zero if the class contains at least one < 483365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 256 character; class_one_char will be 1 if the class contains just one 483465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich character; xclass_has_prop will be TRUE if unicode property checks 483565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich are present in the class. */ 483665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 483765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich class_has_8bitchar = 0; 483865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich class_one_char = 0; 483965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 484065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich xclass_has_prop = FALSE; 484165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 484265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 484365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Initialize the 32-char bit map to all zeros. We build the map in a 484465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich temporary bit of memory, in case the class contains fewer than two 484565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 8-bit characters because in that case the compiled code doesn't use the bit 484665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich map. */ 484765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 484865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memset(classbits, 0, 32 * sizeof(pcre_uint8)); 484965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 485065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Process characters until ] is reached. By writing this as a "do" it 485165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich means that an initial ] is taken as a data character. At the start of the 485265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich loop, c contains the first byte of the character. */ 485365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 485465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c != CHAR_NULL) do 485565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 485665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uchar *oldptr; 485765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 485865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF 485965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf && HAS_EXTRALEN(c)) 486065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { /* Braces are required because the */ 486165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */ 486265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 486365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 486465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 486565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 486665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In the pre-compile phase, accumulate the length of any extra 486765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich data and reset the pointer. This is so that very large classes that 486865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich contain a zillion > 255 characters no longer overwrite the work space 486965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (which is on the stack). We have to remember that there was XCLASS data, 487065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich however. */ 487165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 487265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lengthptr != NULL && class_uchardata > class_uchardata_base) 487365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 487465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich xclass = TRUE; 487565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *lengthptr += (int)(class_uchardata - class_uchardata_base); 487665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich class_uchardata = class_uchardata_base; 487765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 487865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 487965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 488065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Inside \Q...\E everything is literal except \E */ 488165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 488265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (inescq) 488365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 488465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E) /* If we are at \E */ 488565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 488665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich inescq = FALSE; /* Reset literal state */ 488765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; /* Skip the 'E' */ 488865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; /* Carry on with next */ 488965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 489065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto CHECK_RANGE; /* Could be range if \E follows */ 489165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 489265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 489365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle POSIX class names. Perl allows a negation extension of the 489465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich form [:^name:]. A square bracket that doesn't match the syntax is 489565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich treated as a literal. We also recognize the POSIX constructions 489665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich [.ch.] and [=ch=] ("collating elements") and fault them, as Perl 489765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 5.6 and 5.8 do. */ 489865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 489965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == CHAR_LEFT_SQUARE_BRACKET && 490065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || 490165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr)) 490265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 490365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich BOOL local_negate = FALSE; 490465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int posix_class, taboffset, tabopt; 490565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich register const pcre_uint8 *cbits = cd->cbits; 490665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uint8 pbits[32]; 490765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 490865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (ptr[1] != CHAR_COLON) 490965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 491065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR31; 491165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 491265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 491365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 491465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr += 2; 491565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*ptr == CHAR_CIRCUMFLEX_ACCENT) 491665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 491765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich local_negate = TRUE; 491865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich should_flip_negation = TRUE; /* Note negative special */ 491965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 492065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 492165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 492265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich posix_class = check_posix_name(ptr, (int)(tempptr - ptr)); 492365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (posix_class < 0) 492465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 492565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR30; 492665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 492765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 492865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 492965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If matching is caseless, upper and lower are converted to 493065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich alpha. This relies on the fact that the class table starts with 493165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich alpha, lower, upper as the first 3 entries. */ 493265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 493365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((options & PCRE_CASELESS) != 0 && posix_class <= 2) 493465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich posix_class = 0; 493565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 493665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* When PCRE_UCP is set, some of the POSIX classes are converted to 493765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich different escape sequences that use Unicode properties \p or \P. Others 493865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich that are not available via \p or \P generate XCL_PROP/XCL_NOTPROP 493965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich directly. */ 494065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 494165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP 494265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((options & PCRE_UCP) != 0) 494365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 494465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich unsigned int ptype = 0; 494565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0); 494665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 494765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* The posix_substitutes table specifies which POSIX classes can be 494865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich converted to \p or \P items. */ 494965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 495065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (posix_substitutes[pc] != NULL) 495165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 495265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich nestptr = tempptr + 1; 495365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr = posix_substitutes[pc] - 1; 495465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 495565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 495665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 495765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* There are three other classes that generate special property calls 495865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich that are recognized only in an XCLASS. */ 495965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 496065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else switch(posix_class) 496165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 496265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case PC_GRAPH: 496365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptype = PT_PXGRAPH; 496465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Fall through */ 496565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case PC_PRINT: 496665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (ptype == 0) ptype = PT_PXPRINT; 496765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Fall through */ 496865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case PC_PUNCT: 496965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (ptype == 0) ptype = PT_PXPUNCT; 497065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP; 497165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *class_uchardata++ = ptype; 497265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *class_uchardata++ = 0; 497365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich xclass_has_prop = TRUE; 497465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr = tempptr + 1; 497565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 497665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 497765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* For all other POSIX classes, no special action is taken in UCP 497865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich mode. Fall through to the non_UCP case. */ 497965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 498065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: 498165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 498265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 498365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 498465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 498565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In the non-UCP case, or when UCP makes no difference, we build the 498665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bit map for the POSIX class in a chunk of local store because we may be 498765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich adding and subtracting from it, and we don't want to subtract bits that 498865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich may be in the main map already. At the end we or the result into the 498965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bit map that is being built. */ 499065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 499165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich posix_class *= 3; 499265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 499365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Copy in the first table (always present) */ 499465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 499565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memcpy(pbits, cbits + posix_class_maps[posix_class], 499665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 32 * sizeof(pcre_uint8)); 499765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 499865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If there is a second table, add or remove it as required. */ 499965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 500065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich taboffset = posix_class_maps[posix_class + 1]; 500165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich tabopt = posix_class_maps[posix_class + 2]; 500265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 500365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (taboffset >= 0) 500465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 500565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (tabopt >= 0) 500665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (c = 0; c < 32; c++) pbits[c] |= cbits[c + taboffset]; 500765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 500865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (c = 0; c < 32; c++) pbits[c] &= ~cbits[c + taboffset]; 500965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 501065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 501165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Now see if we need to remove any special characters. An option 501265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich value of 1 removes vertical space and 2 removes underscore. */ 501365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 501465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (tabopt < 0) tabopt = -tabopt; 501565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (tabopt == 1) pbits[1] &= ~0x3c; 501665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (tabopt == 2) pbits[11] &= 0x7f; 501765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 501865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Add the POSIX table or its complement into the main table that is 501965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich being built and we are done. */ 502065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 502165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (local_negate) 502265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (c = 0; c < 32; c++) classbits[c] |= ~pbits[c]; 502365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 502465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (c = 0; c < 32; c++) classbits[c] |= pbits[c]; 502565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 502665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr = tempptr + 1; 502765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Every class contains at least one < 256 character. */ 502865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich class_has_8bitchar = 1; 502965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Every class contains at least two characters. */ 503065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich class_one_char = 2; 503165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; /* End of POSIX syntax handling */ 503265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 503365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 503465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Backslash may introduce a single character, or it may introduce one 503565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich of the specials, which just set a flag. The sequence \b is a special 503665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case. Inside a class (and only there) it is treated as backspace. We 503765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich assume that other escapes have more than one character in them, so 503865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich speculatively set both class_has_8bitchar and class_one_char bigger 503965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich than one. Unrecognized escapes fall through and are either treated 504065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich as literal characters (by default), or are faulted if 504165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PCRE_EXTRA is set. */ 504265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 504365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == CHAR_BACKSLASH) 504465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 504565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, 504665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich TRUE); 504765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*errorcodeptr != 0) goto FAILED; 504865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (escape == 0) c = ec; 504965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (escape == ESC_b) c = CHAR_BS; /* \b is backspace in a class */ 505065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (escape == ESC_N) /* \N is not supported in a class */ 505165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 505265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR71; 505365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 505465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 505565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (escape == ESC_Q) /* Handle start of quoted string */ 505665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 505765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) 505865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 505965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr += 2; /* avoid empty string */ 506065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 506165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else inescq = TRUE; 506265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 506365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 506465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (escape == ESC_E) continue; /* Ignore orphan \E */ 506565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 506665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 506765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 506865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich register const pcre_uint8 *cbits = cd->cbits; 506965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Every class contains at least two < 256 characters. */ 507065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich class_has_8bitchar++; 507165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Every class contains at least two characters. */ 507265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich class_one_char += 2; 507365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 507465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch (escape) 507565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 507665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP 507765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case ESC_du: /* These are the values given for \d etc */ 507865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case ESC_DU: /* when PCRE_UCP is set. We replace the */ 507965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case ESC_wu: /* escape sequence with an appropriate \p */ 508065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case ESC_WU: /* or \P to test Unicode properties instead */ 508165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case ESC_su: /* of the default ASCII testing. */ 508265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case ESC_SU: 508365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich nestptr = ptr; 508465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr = substitutes[escape - ESC_DU] - 1; /* Just before substitute */ 508565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich class_has_8bitchar--; /* Undo! */ 508665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 508765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 508865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case ESC_d: 508965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_digit]; 509065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 509165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 509265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case ESC_D: 509365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich should_flip_negation = TRUE; 509465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit]; 509565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 509665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 509765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case ESC_w: 509865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_word]; 509965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 510065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 510165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case ESC_W: 510265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich should_flip_negation = TRUE; 510365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word]; 510465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 510565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 510665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl 510765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 5.18. Before PCRE 8.34, we had to preserve the VT bit if it was 510865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previously set by something earlier in the character class. 510965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so 511065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich we could just adjust the appropriate bit. From PCRE 8.34 we no 511165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich longer treat \s and \S specially. */ 511265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 511365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case ESC_s: 511465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_space]; 511565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 511665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 511765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case ESC_S: 511865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich should_flip_negation = TRUE; 511965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space]; 512065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 512165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 512265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* The rest apply in both UCP and non-UCP cases. */ 512365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 512465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case ESC_h: 512565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (void)add_list_to_class(classbits, &class_uchardata, options, cd, 512665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PRIV(hspace_list), NOTACHAR); 512765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 512865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 512965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case ESC_H: 513065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (void)add_not_list_to_class(classbits, &class_uchardata, options, 513165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd, PRIV(hspace_list)); 513265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 513365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 513465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case ESC_v: 513565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (void)add_list_to_class(classbits, &class_uchardata, options, cd, 513665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PRIV(vspace_list), NOTACHAR); 513765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 513865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 513965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case ESC_V: 514065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (void)add_not_list_to_class(classbits, &class_uchardata, options, 514165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd, PRIV(vspace_list)); 514265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 514365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 514465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP 514565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case ESC_p: 514665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case ESC_P: 514765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 514865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich BOOL negated; 514965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich unsigned int ptype = 0, pdata = 0; 515065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr)) 515165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 515265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *class_uchardata++ = ((escape == ESC_p) != negated)? 515365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich XCL_PROP : XCL_NOTPROP; 515465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *class_uchardata++ = ptype; 515565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *class_uchardata++ = pdata; 515665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich xclass_has_prop = TRUE; 515765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich class_has_8bitchar--; /* Undo! */ 515865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 515965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 516065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 516165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Unrecognized escapes are faulted if PCRE is running in its 516265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich strict mode. By default, for compatibility with Perl, they are 516365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich treated as literals. */ 516465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 516565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: 516665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((options & PCRE_EXTRA) != 0) 516765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 516865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR7; 516965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 517065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 517165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich class_has_8bitchar--; /* Undo the speculative increase. */ 517265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich class_one_char -= 2; /* Undo the speculative increase. */ 517365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = *ptr; /* Get the final character and fall through */ 517465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 517565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 517665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 517765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 517865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Fall through if the escape just defined a single character (c >= 0). 517965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich This may be greater than 256. */ 518065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 518165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich escape = 0; 518265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 518365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } /* End of backslash handling */ 518465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 518565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* A character may be followed by '-' to form a range. However, Perl does 518665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich not permit ']' to be the end of the range. A '-' character at the end is 518765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich treated as a literal. Perl ignores orphaned \E sequences entirely. The 518865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code for handling \Q and \E is messy. */ 518965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 519065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHECK_RANGE: 519165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) 519265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 519365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich inescq = FALSE; 519465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr += 2; 519565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 519665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich oldptr = ptr; 519765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 519865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Remember if \r or \n were explicitly used */ 519965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 520065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == CHAR_CR || c == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF; 520165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 520265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Check for range */ 520365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 520465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!inescq && ptr[1] == CHAR_MINUS) 520565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 520665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uint32 d; 520765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr += 2; 520865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2; 520965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 521065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If we hit \Q (not followed by \E) at this point, go into escaped 521165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich mode. */ 521265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 521365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q) 521465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 521565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr += 2; 521665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) 521765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { ptr += 2; continue; } 521865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich inescq = TRUE; 521965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 522065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 522165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 522265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Minus (hyphen) at the end of a class is treated as a literal, so put 522365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich back the pointer and jump to handle the character that preceded it. */ 522465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 522565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*ptr == CHAR_NULL || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET)) 522665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 522765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr = oldptr; 522865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto CLASS_SINGLE_CHARACTER; 522965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 523065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 523165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Otherwise, we have a potential range; pick up the next character */ 523265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 523365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF 523465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf) 523565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { /* Braces are required because the */ 523665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich GETCHARLEN(d, ptr, ptr); /* macro generates multiple statements */ 523765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 523865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 523965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 524065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich d = *ptr; /* Not UTF-8 mode */ 524165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 524265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* The second part of a range can be a single-character escape 524365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich sequence, but not any of the other escapes. Perl treats a hyphen as a 524465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich literal in such circumstances. However, in Perl's warning mode, a 524565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich warning is given, so PCRE now faults it as it is almost certainly a 524665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich mistake on the user's part. */ 524765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 524865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!inescq) 524965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 525065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (d == CHAR_BACKSLASH) 525165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 525265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int descape; 525365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich descape = check_escape(&ptr, &d, errorcodeptr, cd->bracount, options, TRUE); 525465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*errorcodeptr != 0) goto FAILED; 525565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 525665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* 0 means a character was put into d; \b is backspace; any other 525765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich special causes an error. */ 525865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 525965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (descape != 0) 526065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 526165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (descape == ESC_b) d = CHAR_BS; else 526265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 526365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR83; 526465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 526565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 526665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 526765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 526865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 526965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* A hyphen followed by a POSIX class is treated in the same way. */ 527065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 527165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (d == CHAR_LEFT_SQUARE_BRACKET && 527265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || 527365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr[1] == CHAR_EQUALS_SIGN) && 527465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich check_posix_syntax(ptr, &tempptr)) 527565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 527665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR83; 527765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 527865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 527965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 528065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 528165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Check that the two values are in the correct order. Optimize 528265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich one-character ranges. */ 528365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 528465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (d < c) 528565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 528665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR8; 528765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 528865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 528965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (d == c) goto CLASS_SINGLE_CHARACTER; /* A few lines below */ 529065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 529165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* We have found a character range, so single character optimizations 529265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cannot be done anymore. Any value greater than 1 indicates that there 529365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich is more than one character. */ 529465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 529565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich class_one_char = 2; 529665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 529765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Remember an explicit \r or \n, and add the range to the class. */ 529865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 529965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF; 530065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 530165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich class_has_8bitchar += 530265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich add_to_class(classbits, &class_uchardata, options, cd, c, d); 530365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 530465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; /* Go get the next char in the class */ 530565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 530665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 530765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle a single character - we can get here for a normal non-escape 530865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich char, or after \ that introduces a single character or for an apparent 530965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich range that isn't. Only the value 1 matters for class_one_char, so don't 531065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich increase it if it is already 2 or more ... just in case there's a class 531165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich with a zillion characters in it. */ 531265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 531365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CLASS_SINGLE_CHARACTER: 531465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (class_one_char < 2) class_one_char++; 531565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 531665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If class_one_char is 1, we have the first single character in the 531765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich class, and there have been no prior ranges, or XCLASS items generated by 531865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich escapes. If this is the final character in the class, we can optimize by 531965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich turning the item into a 1-character OP_CHAR[I] if it's positive, or 532065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_NOT[I] if it's negative. In the positive case, it can cause firstchar 532165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich to be set. Otherwise, there can be no first char if this item is first, 532265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich whatever repeat count may follow. In the case of reqchar, save the 532365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous value for reinstating. */ 532465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 532565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!inescq && class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) 532665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 532765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 532865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zeroreqchar = reqchar; 532965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zeroreqcharflags = reqcharflags; 533065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 533165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (negate_class) 533265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 533365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP 533465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int d; 533565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 533665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; 533765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zerofirstchar = firstchar; 533865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zerofirstcharflags = firstcharflags; 533965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 534065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* For caseless UTF-8 mode when UCP support is available, check 534165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich whether this character has more than one other case. If so, generate 534265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich a special OP_NOTPROP item instead of OP_NOTI. */ 534365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 534465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP 534565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf && (options & PCRE_CASELESS) != 0 && 534665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (d = UCD_CASESET(c)) != 0) 534765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 534865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_NOTPROP; 534965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = PT_CLIST; 535065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = d; 535165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 535265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 535365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 535465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Char has only one other case, or UCP not available */ 535565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 535665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 535765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT; 535865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 535965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR) 536065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += PRIV(ord2utf)(c, code); 536165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 536265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 536365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = c; 536465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 536565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 536665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* We are finished with this character class */ 536765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 536865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto END_CLASS; 536965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 537065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 537165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* For a single, positive character, get the value into mcbuffer, and 537265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich then we can handle this with the normal one-character code. */ 537365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 537465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 537565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR) 537665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich mclength = PRIV(ord2utf)(c, mcbuffer); 537765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 537865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 537965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 538065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich mcbuffer[0] = c; 538165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich mclength = 1; 538265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 538365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto ONE_CHAR; 538465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } /* End of 1-char optimization */ 538565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 538665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* There is more than one character in the class, or an XCLASS item 538765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich has been generated. Add this character to the class. */ 538865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 538965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich class_has_8bitchar += 539065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich add_to_class(classbits, &class_uchardata, options, cd, c, c); 539165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 539265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 539365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Loop until ']' reached. This "while" is the end of the "do" far above. 539465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich If we are at the end of an internal nested string, revert to the outer 539565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich string. */ 539665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 539765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (((c = *(++ptr)) != CHAR_NULL || 539865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (nestptr != NULL && 539965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (ptr = nestptr, nestptr = NULL, c = *(++ptr)) != CHAR_NULL)) && 540065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (c != CHAR_RIGHT_SQUARE_BRACKET || inescq)); 540165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 540265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Check for missing terminating ']' */ 540365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 540465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c == CHAR_NULL) 540565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 540665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR6; 540765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 540865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 540965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 541065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* We will need an XCLASS if data has been placed in class_uchardata. In 541165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the second phase this is a sufficient test. However, in the pre-compile 541265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich phase, class_uchardata gets emptied to prevent workspace overflow, so it 541365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich only if the very last character in the class needs XCLASS will it contain 541465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich anything at this point. For this reason, xclass gets set TRUE above when 541565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich uchar_classdata is emptied, and that's why this code is the way it is here 541665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich instead of just doing a test on class_uchardata below. */ 541765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 541865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 541965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (class_uchardata > class_uchardata_base) xclass = TRUE; 542065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 542165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 542265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If this is the first thing in the branch, there can be no first char 542365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich setting, whatever the repeat count. Any reqchar setting must remain 542465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich unchanged after any kind of repeat. */ 542565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 542665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; 542765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zerofirstchar = firstchar; 542865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zerofirstcharflags = firstcharflags; 542965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zeroreqchar = reqchar; 543065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zeroreqcharflags = reqcharflags; 543165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 543265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If there are characters with values > 255, we have to compile an 543365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich extended class, with its own opcode, unless there was a negated special 543465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich such as \S in the class, and PCRE_UCP is not set, because in that case all 543565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich characters > 255 are in the class, so any that were explicitly given as 543665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich well can be ignored. If (when there are explicit characters > 255 that must 543765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich be listed) there are no characters < 256, we can omit the bitmap in the 543865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich actual compiled code. */ 543965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 544065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF 544165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (xclass && (!should_flip_negation || (options & PCRE_UCP) != 0)) 544265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif !defined COMPILE_PCRE8 544365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (xclass && !should_flip_negation) 544465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 544565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 544665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 544765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *class_uchardata++ = XCL_END; /* Marks the end of extra data */ 544865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_XCLASS; 544965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += LINK_SIZE; 545065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code = negate_class? XCL_NOT:0; 545165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (xclass_has_prop) *code |= XCL_HASPROP; 545265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 545365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If the map is required, move up the extra data to make room for it; 545465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich otherwise just move the code pointer to the end of the extra data. */ 545565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 545665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (class_has_8bitchar > 0) 545765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 545865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ |= XCL_MAP; 545965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memmove(code + (32 / sizeof(pcre_uchar)), code, 546065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich IN_UCHARS(class_uchardata - code)); 546165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (negate_class && !xclass_has_prop) 546265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (c = 0; c < 32; c++) classbits[c] = ~classbits[c]; 546365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memcpy(code, classbits, 32); 546465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code = class_uchardata + (32 / sizeof(pcre_uchar)); 546565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 546665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else code = class_uchardata; 546765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 546865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Now fill in the complete length of the item */ 546965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 547065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT(previous, 1, (int)(code - previous)); 547165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; /* End of class handling */ 547265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 547365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 547465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 547565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If there are no characters > 255, or they are all to be included or 547665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the 547765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich whole class was negated and whether there were negative specials such as \S 547865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (non-UCP) in the class. Then copy the 32-byte map into the code vector, 547965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich negating it if necessary. */ 548065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 548165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS; 548265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lengthptr == NULL) /* Save time in the pre-compile phase */ 548365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 548465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (negate_class) 548565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (c = 0; c < 32; c++) classbits[c] = ~classbits[c]; 548665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memcpy(code, classbits, 32); 548765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 548865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += 32 / sizeof(pcre_uchar); 548965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 549065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich END_CLASS: 549165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 549265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 549365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 549465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* ===================================================================*/ 549565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Various kinds of repeat; '{' is not necessarily a quantifier, but this 549665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich has been tested above. */ 549765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 549865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_LEFT_CURLY_BRACKET: 549965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!is_quantifier) goto NORMAL_CHAR; 550065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr); 550165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*errorcodeptr != 0) goto FAILED; 550265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto REPEAT; 550365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 550465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_ASTERISK: 550565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeat_min = 0; 550665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeat_max = -1; 550765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto REPEAT; 550865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 550965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_PLUS: 551065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeat_min = 1; 551165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeat_max = -1; 551265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto REPEAT; 551365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 551465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_QUESTION_MARK: 551565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeat_min = 0; 551665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeat_max = 1; 551765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 551865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich REPEAT: 551965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (previous == NULL) 552065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 552165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR9; 552265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 552365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 552465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 552565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (repeat_min == 0) 552665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 552765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich firstchar = zerofirstchar; /* Adjust for zero repeat */ 552865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich firstcharflags = zerofirstcharflags; 552965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqchar = zeroreqchar; /* Ditto */ 553065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqcharflags = zeroreqcharflags; 553165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 553265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 553365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Remember whether this is a variable length repeat */ 553465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 553565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY; 553665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 553765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich op_type = 0; /* Default single-char op codes */ 553865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich possessive_quantifier = FALSE; /* Default not possessive quantifier */ 553965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 554065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Save start of previous item, in case we have to move it up in order to 554165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich insert something before it. */ 554265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 554365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich tempcode = previous; 554465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 554565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Before checking for a possessive quantifier, we must skip over 554665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich whitespace and comments in extended mode because Perl allows white space at 554765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich this point. */ 554865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 554965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((options & PCRE_EXTENDED) != 0) 555065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 555165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uchar *p = ptr + 1; 555265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (;;) 555365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 555465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (MAX_255(*p) && (cd->ctypes[*p] & ctype_space) != 0) p++; 555565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*p != CHAR_NUMBER_SIGN) break; 555665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich p++; 555765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (*p != CHAR_NULL) 555865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 555965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (IS_NEWLINE(p)) /* For non-fixed-length newline cases, */ 556065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { /* IS_NEWLINE sets cd->nllen. */ 556165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich p += cd->nllen; 556265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 556365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 556465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich p++; 556565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF 556665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf) FORWARDCHAR(p); 556765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 556865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } /* Loop for comment characters */ 556965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } /* Loop for multiple comments */ 557065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr = p - 1; /* Character before the next significant one. */ 557165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 557265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 557365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If the next character is '+', we have a possessive quantifier. This 557465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich implies greediness, whatever the setting of the PCRE_UNGREEDY option. 557565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich If the next character is '?' this is a minimizing repeat, by default, 557665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich but if PCRE_UNGREEDY is set, it works the other way round. We change the 557765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeat type to the non-default. */ 557865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 557965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (ptr[1] == CHAR_PLUS) 558065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 558165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeat_type = 0; /* Force greedy */ 558265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich possessive_quantifier = TRUE; 558365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 558465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 558565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (ptr[1] == CHAR_QUESTION_MARK) 558665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 558765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeat_type = greedy_non_default; 558865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 558965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 559065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else repeat_type = greedy_default; 559165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 559265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If previous was a recursion call, wrap it in atomic brackets so that 559365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous becomes the atomic group. All recursions were so wrapped in the 559465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich past, but it no longer happens for non-repeated recursions. In fact, the 559565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeated ones could be re-implemented independently so as not to need this, 559665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich but for the moment we rely on the code for repeating groups. */ 559765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 559865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*previous == OP_RECURSE) 559965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 560065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memmove(previous + 1 + LINK_SIZE, previous, IN_UCHARS(1 + LINK_SIZE)); 560165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *previous = OP_ONCE; 560265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT(previous, 1, 2 + 2*LINK_SIZE); 560365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous[2 + 2*LINK_SIZE] = OP_KET; 560465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT(previous, 3 + 2*LINK_SIZE, 2 + 2*LINK_SIZE); 560565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += 2 + 2 * LINK_SIZE; 560665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich length_prevgroup = 3 + 3*LINK_SIZE; 560765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 560865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* When actually compiling, we need to check whether this was a forward 560965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reference, and if so, adjust the offset. */ 561065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 561165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lengthptr == NULL && cd->hwm >= cd->start_workspace + LINK_SIZE) 561265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 561365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int offset = GET(cd->hwm, -LINK_SIZE); 561465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (offset == previous + 1 - cd->start_code) 561565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT(cd->hwm, -LINK_SIZE, offset + 1 + LINK_SIZE); 561665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 561765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 561865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 561965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Now handle repetition for the different types of item. */ 562065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 562165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If previous was a character or negated character match, abolish the item 562265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich and generate a repeat item instead. If a char item has a minimum of more 562365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich than one, ensure that it is set in reqchar - it might not be if a sequence 562465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich such as x{3} is the first thing in a branch because the x will have gone 562565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich into firstchar instead. */ 562665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 562765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*previous == OP_CHAR || *previous == OP_CHARI 562865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich || *previous == OP_NOT || *previous == OP_NOTI) 562965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 563065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch (*previous) 563165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 563265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: /* Make compiler happy. */ 563365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CHAR: op_type = OP_STAR - OP_STAR; break; 563465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CHARI: op_type = OP_STARI - OP_STAR; break; 563565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT: op_type = OP_NOTSTAR - OP_STAR; break; 563665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTI: op_type = OP_NOTSTARI - OP_STAR; break; 563765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 563865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 563965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Deal with UTF characters that take up more than one character. It's 564065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich easier to write this out separately than try to macrify it. Use c to 564165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich hold the length of the character in bytes, plus UTF_LENGTH to flag that 564265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich it's a length rather than a small character. */ 564365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 564465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 564565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf && NOT_FIRSTCHAR(code[-1])) 564665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 564765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *lastchar = code - 1; 564865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich BACKCHAR(lastchar); 564965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = (int)(code - lastchar); /* Length of UTF-8 character */ 565065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memcpy(utf_chars, lastchar, IN_UCHARS(c)); /* Save the char */ 565165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c |= UTF_LENGTH; /* Flag c as a length */ 565265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 565365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 565465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif /* SUPPORT_UTF */ 565565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 565665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle the case of a single charater - either with no UTF support, or 565765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich with UTF disabled, or for a single character UTF character. */ 565865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 565965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = code[-1]; 566065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*previous <= OP_CHARI && repeat_min > 1) 566165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 566265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqchar = c; 566365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqcharflags = req_caseopt | cd->req_varyopt; 566465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 566565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 566665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 566765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */ 566865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 566965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 567065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If previous was a character type match (\d or similar), abolish it and 567165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich create a suitable repeat item. The code is shared with single-character 567265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeats by setting op_type to add a suitable offset into repeat_type. Note 567365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the the Unicode property types will be present only when SUPPORT_UCP is 567465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich defined, but we don't wrap the little bits of code here because it just 567565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich makes it horribly messy. */ 567665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 567765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (*previous < OP_EODN) 567865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 567965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *oldcode; 568065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int prop_type, prop_value; 568165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */ 568265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = *previous; 568365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 568465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OUTPUT_SINGLE_REPEAT: 568565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*previous == OP_PROP || *previous == OP_NOTPROP) 568665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 568765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich prop_type = previous[1]; 568865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich prop_value = previous[2]; 568965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 569065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else prop_type = prop_value = -1; 569165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 569265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich oldcode = code; 569365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code = previous; /* Usually overwrite previous item */ 569465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 569565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If the maximum is zero then the minimum must also be zero; Perl allows 569665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich this case, so we do too - by simply omitting the item altogether. */ 569765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 569865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (repeat_max == 0) goto END_REPEAT; 569965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 570065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Combine the op_type with the repeat_type */ 570165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 570265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeat_type += op_type; 570365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 570465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* A minimum of zero is handled either as the special case * or ?, or as 570565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich an UPTO, with the maximum given. */ 570665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 570765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (repeat_min == 0) 570865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 570965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (repeat_max == -1) *code++ = OP_STAR + repeat_type; 571065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type; 571165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 571265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 571365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_UPTO + repeat_type; 571465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT2INC(code, 0, repeat_max); 571565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 571665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 571765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 571865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* A repeat minimum of 1 is optimized into some special cases. If the 571965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich maximum is unlimited, we use OP_PLUS. Otherwise, the original item is 572065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich left in place and, if the maximum is greater than 1, we use OP_UPTO with 572165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich one less than the maximum. */ 572265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 572365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (repeat_min == 1) 572465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 572565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (repeat_max == -1) 572665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_PLUS + repeat_type; 572765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 572865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 572965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code = oldcode; /* leave previous item in place */ 573065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (repeat_max == 1) goto END_REPEAT; 573165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_UPTO + repeat_type; 573265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT2INC(code, 0, repeat_max - 1); 573365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 573465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 573565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 573665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* The case {n,n} is just an EXACT, while the general case {n,m} is 573765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich handled as an EXACT followed by an UPTO. */ 573865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 573965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 574065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 574165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */ 574265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT2INC(code, 0, repeat_min); 574365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 574465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If the maximum is unlimited, insert an OP_STAR. Before doing so, 574565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich we have to insert the character for the previous code. For a repeated 574665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Unicode property match, there are two extra bytes that define the 574765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich required property. In UTF-8 mode, long characters have their length in 574865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c, with the UTF_LENGTH bit as a flag. */ 574965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 575065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (repeat_max < 0) 575165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 575265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 575365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf && (c & UTF_LENGTH) != 0) 575465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 575565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memcpy(code, utf_chars, IN_UCHARS(c & 7)); 575665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += c & 7; 575765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 575865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 575965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 576065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 576165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = c; 576265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (prop_type >= 0) 576365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 576465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = prop_type; 576565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = prop_value; 576665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 576765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 576865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_STAR + repeat_type; 576965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 577065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 577165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Else insert an UPTO if the max is greater than the min, again 577265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich preceded by the character, for the previously inserted code. If the 577365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich UPTO is just for 1 instance, we can use QUERY instead. */ 577465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 577565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (repeat_max != repeat_min) 577665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 577765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 577865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf && (c & UTF_LENGTH) != 0) 577965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 578065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memcpy(code, utf_chars, IN_UCHARS(c & 7)); 578165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += c & 7; 578265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 578365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 578465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 578565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = c; 578665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (prop_type >= 0) 578765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 578865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = prop_type; 578965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = prop_value; 579065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 579165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeat_max -= repeat_min; 579265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 579365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (repeat_max == 1) 579465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 579565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_QUERY + repeat_type; 579665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 579765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 579865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 579965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_UPTO + repeat_type; 580065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT2INC(code, 0, repeat_max); 580165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 580265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 580365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 580465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 580565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* The character or character type itself comes last in all cases. */ 580665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 580765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 580865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf && (c & UTF_LENGTH) != 0) 580965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 581065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memcpy(code, utf_chars, IN_UCHARS(c & 7)); 581165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += c & 7; 581265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 581365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 581465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 581565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = c; 581665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 581765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* For a repeated Unicode property match, there are two extra bytes that 581865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich define the required property. */ 581965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 582065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP 582165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (prop_type >= 0) 582265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 582365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = prop_type; 582465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = prop_value; 582565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 582665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 582765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 582865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 582965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If previous was a character class or a back reference, we put the repeat 583065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich stuff after it, but just skip the item if the repeat was {0,0}. */ 583165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 583265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (*previous == OP_CLASS || *previous == OP_NCLASS || 583365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 583465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *previous == OP_XCLASS || 583565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 583665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *previous == OP_REF || *previous == OP_REFI || 583765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *previous == OP_DNREF || *previous == OP_DNREFI) 583865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 583965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (repeat_max == 0) 584065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 584165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code = previous; 584265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto END_REPEAT; 584365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 584465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 584565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (repeat_min == 0 && repeat_max == -1) 584665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_CRSTAR + repeat_type; 584765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (repeat_min == 1 && repeat_max == -1) 584865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_CRPLUS + repeat_type; 584965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (repeat_min == 0 && repeat_max == 1) 585065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_CRQUERY + repeat_type; 585165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 585265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 585365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_CRRANGE + repeat_type; 585465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT2INC(code, 0, repeat_min); 585565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (repeat_max == -1) repeat_max = 0; /* 2-byte encoding for max */ 585665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT2INC(code, 0, repeat_max); 585765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 585865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 585965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 586065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If previous was a bracket group, we may have to replicate it in certain 586165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cases. Note that at this point we can encounter only the "basic" bracket 586265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich opcodes such as BRA and CBRA, as this is the place where they get converted 586365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich into the more special varieties such as BRAPOS and SBRA. A test for >= 586465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_ASSERT and <= OP_COND includes ASSERT, ASSERT_NOT, ASSERTBACK, 586565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ASSERTBACK_NOT, ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND. 586665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Originally, PCRE did not allow repetition of assertions, but now it does, 586765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for Perl compatibility. */ 586865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 586965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (*previous >= OP_ASSERT && *previous <= OP_COND) 587065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 587165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich register int i; 587265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int len = (int)(code - previous); 587365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *bralink = NULL; 587465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *brazeroptr = NULL; 587565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 587665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Repeating a DEFINE group is pointless, but Perl allows the syntax, so 587765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich we just ignore the repeat. */ 587865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 587965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_DEF) 588065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto END_REPEAT; 588165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 588265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* There is no sense in actually repeating assertions. The only potential 588365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich use of repetition is in cases when the assertion is optional. Therefore, 588465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if the minimum is greater than zero, just ignore the repeat. If the 588565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich maximum is not zero or one, set it to 1. */ 588665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 588765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*previous < OP_ONCE) /* Assertion */ 588865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 588965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (repeat_min > 0) goto END_REPEAT; 589065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (repeat_max < 0 || repeat_max > 1) repeat_max = 1; 589165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 589265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 589365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* The case of a zero minimum is special because of the need to stick 589465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_BRAZERO in front of it, and because the group appears once in the 589565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich data, whereas in other cases it appears the minimum number of times. For 589665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich this reason, it is simplest to treat this case separately, as otherwise 589765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the code gets far too messy. There are several special subcases when the 589865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich minimum is zero. */ 589965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 590065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (repeat_min == 0) 590165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 590265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If the maximum is also zero, we used to just omit the group from the 590365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich output altogether, like this: 590465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 590565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ** if (repeat_max == 0) 590665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ** { 590765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ** code = previous; 590865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ** goto END_REPEAT; 590965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ** } 591065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 591165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich However, that fails when a group or a subgroup within it is referenced 591265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich as a subroutine from elsewhere in the pattern, so now we stick in 591365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_SKIPZERO in front of it so that it is skipped on execution. As we 591465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich don't have a list of which groups are referenced, we cannot do this 591565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich selectively. 591665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 591765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich If the maximum is 1 or unlimited, we just have to stick in the BRAZERO 591865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich and do no more at this point. However, we do need to adjust any 591965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_RECURSE calls inside the group that refer to the group itself or any 592065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich internal or forward referenced group, because the offset is from the 592165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich start of the whole regex. Temporarily terminate the pattern while doing 592265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich this. */ 592365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 592465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (repeat_max <= 1) /* Covers 0, 1, and unlimited */ 592565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 592665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code = OP_END; 592765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich adjust_recurse(previous, 1, utf, cd, save_hwm); 592865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memmove(previous + 1, previous, IN_UCHARS(len)); 592965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code++; 593065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (repeat_max == 0) 593165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 593265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *previous++ = OP_SKIPZERO; 593365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto END_REPEAT; 593465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 593565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich brazeroptr = previous; /* Save for possessive optimizing */ 593665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *previous++ = OP_BRAZERO + repeat_type; 593765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 593865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 593965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If the maximum is greater than 1 and limited, we have to replicate 594065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich in a nested fashion, sticking OP_BRAZERO before each set of brackets. 594165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich The first one has to be handled carefully because it's the original 594265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich copy, which has to be moved up. The remainder can be handled by code 594365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich that is common with the non-zero minimum case below. We have to 594465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich adjust the value or repeat_max, since one less copy is required. Once 594565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich again, we may have to adjust any OP_RECURSE calls inside the group. */ 594665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 594765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 594865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 594965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int offset; 595065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code = OP_END; 595165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm); 595265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len)); 595365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += 2 + LINK_SIZE; 595465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *previous++ = OP_BRAZERO + repeat_type; 595565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *previous++ = OP_BRA; 595665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 595765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* We chain together the bracket offset fields that have to be 595865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich filled in later when the ends of the brackets are reached. */ 595965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 596065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich offset = (bralink == NULL)? 0 : (int)(previous - bralink); 596165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bralink = previous; 596265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUTINC(previous, 0, offset); 596365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 596465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 596565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeat_max--; 596665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 596765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 596865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If the minimum is greater than zero, replicate the group as many 596965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich times as necessary, and adjust the maximum to the number of subsequent 597065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich copies that we need. If we set a first char from the group, and didn't 597165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich set a required char, copy the latter from the former. If there are any 597265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich forward reference subroutine calls in the group, there will be entries on 597365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the workspace list; replicate these with an appropriate increment. */ 597465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 597565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 597665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 597765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (repeat_min > 1) 597865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 597965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In the pre-compile phase, we don't actually do the replication. We 598065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich just adjust the length as if we had. Do some paranoid checks for 598165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit 598265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich integer type when available, otherwise double. */ 598365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 598465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lengthptr != NULL) 598565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 598665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int delta = (repeat_min - 1)*length_prevgroup; 598765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((INT64_OR_DOUBLE)(repeat_min - 1)* 598865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (INT64_OR_DOUBLE)length_prevgroup > 598965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (INT64_OR_DOUBLE)INT_MAX || 599065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OFLOW_MAX - *lengthptr < delta) 599165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 599265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR20; 599365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 599465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 599565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *lengthptr += delta; 599665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 599765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 599865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* This is compiling for real. If there is a set first byte for 599965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the group, and we have not yet set a "required byte", set it. Make 600065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich sure there is enough workspace for copying forward references before 600165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich doing the copy. */ 600265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 600365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 600465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 600565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (groupsetfirstchar && reqcharflags < 0) 600665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 600765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqchar = firstchar; 600865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqcharflags = firstcharflags; 600965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 601065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 601165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (i = 1; i < repeat_min; i++) 601265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 601365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *hc; 601465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *this_hwm = cd->hwm; 601565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memcpy(code, previous, IN_UCHARS(len)); 601665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 601765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (cd->hwm > cd->start_workspace + cd->workspace_size - 601865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm)) 601965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 602065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich size_t save_offset = save_hwm - cd->start_workspace; 602165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich size_t this_offset = this_hwm - cd->start_workspace; 602265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = expand_workspace(cd); 602365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*errorcodeptr != 0) goto FAILED; 602465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich save_hwm = (pcre_uchar *)cd->start_workspace + save_offset; 602565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich this_hwm = (pcre_uchar *)cd->start_workspace + this_offset; 602665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 602765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 602865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE) 602965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 603065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT(cd->hwm, 0, GET(hc, 0) + len); 603165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->hwm += LINK_SIZE; 603265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 603365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich save_hwm = this_hwm; 603465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += len; 603565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 603665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 603765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 603865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 603965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (repeat_max > 0) repeat_max -= repeat_min; 604065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 604165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 604265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* This code is common to both the zero and non-zero minimum cases. If 604365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the maximum is limited, it replicates the group in a nested fashion, 604465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich remembering the bracket starts on a stack. In the case of a zero minimum, 604565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the first one was set up above. In all cases the repeat_max now specifies 604665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the number of additional copies needed. Again, we must remember to 604765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich replicate entries on the forward reference list. */ 604865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 604965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (repeat_max >= 0) 605065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 605165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In the pre-compile phase, we don't actually do the replication. We 605265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich just adjust the length as if we had. For each repetition we must add 1 605365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich to the length for BRAZERO and for all but the last repetition we must 605465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some 605565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is 605665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich a 64-bit integer type when available, otherwise double. */ 605765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 605865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lengthptr != NULL && repeat_max > 0) 605965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 606065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) - 606165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 2 - 2*LINK_SIZE; /* Last one doesn't nest */ 606265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((INT64_OR_DOUBLE)repeat_max * 606365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE) 606465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich > (INT64_OR_DOUBLE)INT_MAX || 606565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OFLOW_MAX - *lengthptr < delta) 606665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 606765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR20; 606865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 606965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 607065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *lengthptr += delta; 607165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 607265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 607365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* This is compiling for real */ 607465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 607565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else for (i = repeat_max - 1; i >= 0; i--) 607665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 607765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *hc; 607865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *this_hwm = cd->hwm; 607965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 608065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_BRAZERO + repeat_type; 608165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 608265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* All but the final copy start a new nesting, maintaining the 608365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich chain of brackets outstanding. */ 608465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 608565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (i != 0) 608665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 608765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int offset; 608865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_BRA; 608965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich offset = (bralink == NULL)? 0 : (int)(code - bralink); 609065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bralink = code; 609165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUTINC(code, 0, offset); 609265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 609365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 609465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memcpy(code, previous, IN_UCHARS(len)); 609565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 609665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Ensure there is enough workspace for forward references before 609765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich copying them. */ 609865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 609965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (cd->hwm > cd->start_workspace + cd->workspace_size - 610065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm)) 610165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 610265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich size_t save_offset = save_hwm - cd->start_workspace; 610365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich size_t this_offset = this_hwm - cd->start_workspace; 610465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = expand_workspace(cd); 610565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*errorcodeptr != 0) goto FAILED; 610665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich save_hwm = (pcre_uchar *)cd->start_workspace + save_offset; 610765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich this_hwm = (pcre_uchar *)cd->start_workspace + this_offset; 610865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 610965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 611065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE) 611165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 611265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1)); 611365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->hwm += LINK_SIZE; 611465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 611565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich save_hwm = this_hwm; 611665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += len; 611765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 611865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 611965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Now chain through the pending brackets, and fill in their length 612065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich fields (which are holding the chain links pro tem). */ 612165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 612265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (bralink != NULL) 612365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 612465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int oldlinkoffset; 612565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int offset = (int)(code - bralink + 1); 612665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *bra = code - offset; 612765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich oldlinkoffset = GET(bra, 1); 612865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset; 612965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_KET; 613065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUTINC(code, 0, offset); 613165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT(bra, 1, offset); 613265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 613365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 613465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 613565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If the maximum is unlimited, set a repeater in the final copy. For 613665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ONCE brackets, that's all we need to do. However, possessively repeated 613765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ONCE brackets can be converted into non-capturing brackets, as the 613865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to 613965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich deal with possessive ONCEs specially. 614065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 614165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Otherwise, when we are doing the actual compile phase, check to see 614265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich whether this group is one that could match an empty string. If so, 614365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so 614465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich that runtime checking can be done. [This check is also applied to ONCE 614565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich groups at runtime, but in a different way.] 614665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 614765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Then, if the quantifier was possessive and the bracket is not a 614865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich conditional, we convert the BRA code to the POS form, and the KET code to 614965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich KETRPOS. (It turns out to be convenient at runtime to detect this kind of 615065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich subpattern at both the start and at the end.) The use of special opcodes 615165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich makes it possible to reduce greatly the stack usage in pcre_exec(). If 615265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO. 615365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 615465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Then, if the minimum number of matches is 1 or 0, cancel the possessive 615565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich flag so that the default action below, of wrapping everything inside 615665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich atomic brackets, does not happen. When the minimum is greater than 1, 615765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich there will be earlier copies of the group, and so we still have to wrap 615865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the whole thing. */ 615965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 616065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 616165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 616265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *ketcode = code - 1 - LINK_SIZE; 616365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *bracode = ketcode - GET(ketcode, 1); 616465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 616565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Convert possessive ONCE brackets to non-capturing */ 616665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 616765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) && 616865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich possessive_quantifier) *bracode = OP_BRA; 616965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 617065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* For non-possessive ONCE brackets, all we need to do is to 617165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich set the KET. */ 617265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 617365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC) 617465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *ketcode = OP_KETRMAX + repeat_type; 617565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 617665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle non-ONCE brackets and possessive ONCEs (which have been 617765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich converted to non-capturing above). */ 617865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 617965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 618065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 618165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In the compile phase, check for empty string matching. */ 618265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 618365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lengthptr == NULL) 618465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 618565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *scode = bracode; 618665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do 618765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 618865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (could_be_empty_branch(scode, ketcode, utf, cd, NULL)) 618965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 619065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *bracode += OP_SBRA - OP_BRA; 619165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 619265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 619365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich scode += GET(scode, 1); 619465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 619565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (*scode == OP_ALT); 619665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 619765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 619865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle possessive quantifiers. */ 619965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 620065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (possessive_quantifier) 620165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 620265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* For COND brackets, we wrap the whole thing in a possessively 620365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeated non-capturing bracket, because we have not invented POS 620465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich versions of the COND opcodes. Because we are moving code along, we 620565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich must ensure that any pending recursive references are updated. */ 620665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 620765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*bracode == OP_COND || *bracode == OP_SCOND) 620865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 620965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int nlen = (int)(code - bracode); 621065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code = OP_END; 621165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm); 621265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen)); 621365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += 1 + LINK_SIZE; 621465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich nlen += 1 + LINK_SIZE; 621565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *bracode = OP_BRAPOS; 621665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_KETRPOS; 621765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUTINC(code, 0, nlen); 621865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT(bracode, 1, nlen); 621965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 622065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 622165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* For non-COND brackets, we modify the BRA code and use KETRPOS. */ 622265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 622365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 622465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 622565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *bracode += 1; /* Switch to xxxPOS opcodes */ 622665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *ketcode = OP_KETRPOS; 622765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 622865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 622965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If the minimum is zero, mark it as possessive, then unset the 623065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich possessive flag when the minimum is 0 or 1. */ 623165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 623265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO; 623365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (repeat_min < 2) possessive_quantifier = FALSE; 623465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 623565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 623665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Non-possessive quantifier */ 623765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 623865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else *ketcode = OP_KETRMAX + repeat_type; 623965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 624065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 624165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 624265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 624365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If previous is OP_FAIL, it was generated by an empty class [] in 624465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich JavaScript mode. The other ways in which OP_FAIL can be generated, that is 624565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich by (*FAIL) or (?!) set previous to NULL, which gives a "nothing to repeat" 624665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich error above. We can just ignore the repeat in JS case. */ 624765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 624865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (*previous == OP_FAIL) goto END_REPEAT; 624965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 625065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Else there's some kind of shambles */ 625165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 625265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 625365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 625465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR11; 625565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 625665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 625765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 625865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If the character following a repeat is '+', possessive_quantifier is 625965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich TRUE. For some opcodes, there are special alternative opcodes for this 626065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case. For anything else, we wrap the entire repeated item inside OP_ONCE 626165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich brackets. Logically, the '+' notation is just syntactic sugar, taken from 626265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Sun's Java package, but the special opcodes can optimize it. 626365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 626465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Some (but not all) possessively repeated subpatterns have already been 626565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich completely handled in the code just above. For them, possessive_quantifier 626665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich is always FALSE at this stage. Note that the repeated item starts at 626765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich tempcode, not at previous, which might be the first part of a string whose 626865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (former) last char we repeated. */ 626965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 627065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (possessive_quantifier) 627165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 627265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int len; 627365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 627465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Possessifying an EXACT quantifier has no effect, so we can ignore it. 627565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich However, QUERY, STAR, or UPTO may follow (for quantifiers such as {5,6}, 627665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich {5,}, or {5,10}). We skip over an EXACT item; if the length of what 627765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich remains is greater than zero, there's a further opcode that can be 627865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich handled. If not, do nothing, leaving the EXACT alone. */ 627965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 628065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch(*tempcode) 628165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 628265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEEXACT: 628365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich tempcode += PRIV(OP_lengths)[*tempcode] + 628465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ((tempcode[1 + IMM2_SIZE] == OP_PROP 628565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich || tempcode[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0); 628665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 628765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 628865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* CHAR opcodes are used for exacts whose count is 1. */ 628965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 629065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CHAR: 629165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CHARI: 629265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOT: 629365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTI: 629465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EXACT: 629565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EXACTI: 629665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTEXACT: 629765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTEXACTI: 629865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich tempcode += PRIV(OP_lengths)[*tempcode]; 629965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF 630065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf && HAS_EXTRALEN(tempcode[-1])) 630165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich tempcode += GET_EXTRALEN(tempcode[-1]); 630265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 630365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 630465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 630565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* For the class opcodes, the repeat operator appears at the end; 630665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich adjust tempcode to point to it. */ 630765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 630865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CLASS: 630965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NCLASS: 631065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich tempcode += 1 + 32/sizeof(pcre_uchar); 631165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 631265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 631365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 631465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_XCLASS: 631565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich tempcode += GET(tempcode, 1); 631665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 631765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 631865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 631965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 632065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If tempcode is equal to code (which points to the end of the repeated 632165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich item), it means we have skipped an EXACT item but there is no following 632265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich QUERY, STAR, or UPTO; the value of len will be 0, and we do nothing. In 632365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich all other cases, tempcode will be pointing to the repeat opcode, and will 632465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich be less than code, so the value of len will be greater than 0. */ 632565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 632665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich len = (int)(code - tempcode); 632765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (len > 0) 632865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 632965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich unsigned int repcode = *tempcode; 633065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 633165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* There is a table for possessifying opcodes, all of which are less 633265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich than OP_CALLOUT. A zero entry means there is no possessified version. 633365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich */ 633465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 633565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (repcode < OP_CALLOUT && opcode_possessify[repcode] > 0) 633665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *tempcode = opcode_possessify[repcode]; 633765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 633865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* For opcode without a special possessified version, wrap the item in 633965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ONCE brackets. Because we are moving code along, we must ensure that any 634065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pending recursive references are updated. */ 634165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 634265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 634365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 634465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code = OP_END; 634565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm); 634665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len)); 634765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += 1 + LINK_SIZE; 634865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich len += 1 + LINK_SIZE; 634965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich tempcode[0] = OP_ONCE; 635065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_KET; 635165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUTINC(code, 0, len); 635265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT(tempcode, 1, len); 635365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 635465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 635565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 635665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef NEVER 635765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (len > 0) switch (*tempcode) 635865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 635965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_STAR: *tempcode = OP_POSSTAR; break; 636065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PLUS: *tempcode = OP_POSPLUS; break; 636165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_QUERY: *tempcode = OP_POSQUERY; break; 636265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_UPTO: *tempcode = OP_POSUPTO; break; 636365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 636465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_STARI: *tempcode = OP_POSSTARI; break; 636565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PLUSI: *tempcode = OP_POSPLUSI; break; 636665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_QUERYI: *tempcode = OP_POSQUERYI; break; 636765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_UPTOI: *tempcode = OP_POSUPTOI; break; 636865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 636965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTSTAR: *tempcode = OP_NOTPOSSTAR; break; 637065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPLUS: *tempcode = OP_NOTPOSPLUS; break; 637165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break; 637265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTUPTO: *tempcode = OP_NOTPOSUPTO; break; 637365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 637465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTSTARI: *tempcode = OP_NOTPOSSTARI; break; 637565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTPLUSI: *tempcode = OP_NOTPOSPLUSI; break; 637665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTQUERYI: *tempcode = OP_NOTPOSQUERYI; break; 637765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_NOTUPTOI: *tempcode = OP_NOTPOSUPTOI; break; 637865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 637965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPESTAR: *tempcode = OP_TYPEPOSSTAR; break; 638065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEPLUS: *tempcode = OP_TYPEPOSPLUS; break; 638165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEQUERY: *tempcode = OP_TYPEPOSQUERY; break; 638265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_TYPEUPTO: *tempcode = OP_TYPEPOSUPTO; break; 638365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 638465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRSTAR: *tempcode = OP_CRPOSSTAR; break; 638565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRPLUS: *tempcode = OP_CRPOSPLUS; break; 638665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRQUERY: *tempcode = OP_CRPOSQUERY; break; 638765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CRRANGE: *tempcode = OP_CRPOSRANGE; break; 638865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 638965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Because we are moving code along, we must ensure that any 639065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pending recursive references are updated. */ 639165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 639265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: 639365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code = OP_END; 639465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm); 639565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len)); 639665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += 1 + LINK_SIZE; 639765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich len += 1 + LINK_SIZE; 639865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich tempcode[0] = OP_ONCE; 639965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_KET; 640065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUTINC(code, 0, len); 640165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT(tempcode, 1, len); 640265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 640365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 640465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 640565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 640665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 640765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In all case we no longer have a previous item. We also set the 640865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich "follows varying string" flag for subsequently encountered reqchars if 640965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich it isn't already set and we have just passed a varying length item. */ 641065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 641165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich END_REPEAT: 641265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous = NULL; 641365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->req_varyopt |= reqvary; 641465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 641565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 641665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 641765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* ===================================================================*/ 641865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Start of nested parenthesized sub-expression, or comment or lookahead or 641965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich lookbehind or option setting or condition or all the other extended 642065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich parenthesis forms. */ 642165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 642265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_LEFT_PARENTHESIS: 642365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich newoptions = options; 642465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skipbytes = 0; 642565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bravalue = OP_CBRA; 642665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich save_hwm = cd->hwm; 642765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reset_bracount = FALSE; 642865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 642965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* First deal with various "verbs" that can be introduced by '*'. */ 643065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 643165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 643265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':' 643365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich || (MAX_255(ptr[1]) && ((cd->ctypes[ptr[1]] & ctype_letter) != 0)))) 643465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 643565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int i, namelen; 643665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int arglen = 0; 643765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const char *vn = verbnames; 643865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uchar *name = ptr + 1; 643965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uchar *arg = NULL; 644065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous = NULL; 644165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 644265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_letter) != 0) ptr++; 644365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich namelen = (int)(ptr - name); 644465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 644565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* It appears that Perl allows any characters whatsoever, other than 644665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich a closing parenthesis, to appear in arguments, so we no longer insist on 644765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich letters, digits, and underscores. */ 644865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 644965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*ptr == CHAR_COLON) 645065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 645165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich arg = ++ptr; 645265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; 645365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich arglen = (int)(ptr - arg); 645465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((unsigned int)arglen > MAX_MARK) 645565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 645665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR75; 645765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 645865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 645965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 646065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 646165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*ptr != CHAR_RIGHT_PARENTHESIS) 646265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 646365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR60; 646465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 646565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 646665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 646765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Scan the table of verb names */ 646865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 646965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (i = 0; i < verbcount; i++) 647065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 647165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (namelen == verbs[i].len && 647265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich STRNCMP_UC_C8(name, vn, namelen) == 0) 647365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 647465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int setverb; 647565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 647665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Check for open captures before ACCEPT and convert it to 647765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ASSERT_ACCEPT if in an assertion. */ 647865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 647965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (verbs[i].op == OP_ACCEPT) 648065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 648165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich open_capitem *oc; 648265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (arglen != 0) 648365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 648465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR59; 648565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 648665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 648765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->had_accept = TRUE; 648865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (oc = cd->open_caps; oc != NULL; oc = oc->next) 648965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 649065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_CLOSE; 649165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT2INC(code, 0, oc->number); 649265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 649365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich setverb = *code++ = 649465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT; 649565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 649665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Do not set firstchar after *ACCEPT */ 649765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; 649865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 649965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 650065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle other cases with/without an argument */ 650165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 650265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (arglen == 0) 650365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 650465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (verbs[i].op < 0) /* Argument is mandatory */ 650565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 650665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR66; 650765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 650865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 650965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich setverb = *code++ = verbs[i].op; 651065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 651165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 651265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 651365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 651465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (verbs[i].op_arg < 0) /* Argument is forbidden */ 651565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 651665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR59; 651765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 651865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 651965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich setverb = *code++ = verbs[i].op_arg; 652065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = arglen; 652165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memcpy(code, arg, IN_UCHARS(arglen)); 652265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += arglen; 652365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = 0; 652465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 652565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 652665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch (setverb) 652765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 652865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_THEN: 652965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_THEN_ARG: 653065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->external_flags |= PCRE_HASTHEN; 653165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 653265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 653365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PRUNE: 653465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PRUNE_ARG: 653565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_SKIP: 653665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_SKIP_ARG: 653765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->had_pruneorskip = TRUE; 653865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 653965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 654065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 654165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; /* Found verb, exit loop */ 654265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 654365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 654465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich vn += verbs[i].len + 1; 654565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 654665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 654765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (i < verbcount) continue; /* Successfully handled a verb */ 654865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR60; /* Verb not recognized */ 654965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 655065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 655165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 655265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Deal with the extended parentheses; all are introduced by '?', and the 655365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich appearance of any of them means that this is not a capturing group. */ 655465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 655565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (*ptr == CHAR_QUESTION_MARK) 655665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 655765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int i, set, unset, namelen; 655865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int *optset; 655965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uchar *name; 656065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *slot; 656165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 656265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch (*(++ptr)) 656365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 656465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_NUMBER_SIGN: /* Comment; skip to ket */ 656565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 656665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; 656765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*ptr == CHAR_NULL) 656865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 656965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR18; 657065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 657165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 657265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 657365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 657465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 657565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* ------------------------------------------------------------ */ 657665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_VERTICAL_LINE: /* Reset capture count for each branch */ 657765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reset_bracount = TRUE; 657865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Fall through */ 657965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 658065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* ------------------------------------------------------------ */ 658165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_COLON: /* Non-capturing bracket */ 658265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bravalue = OP_BRA; 658365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 658465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 658565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 658665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 658765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* ------------------------------------------------------------ */ 658865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_LEFT_PARENTHESIS: 658965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bravalue = OP_COND; /* Conditional group */ 659065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich tempptr = ptr; 659165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 659265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* A condition can be an assertion, a number (referring to a numbered 659365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich group's having been set), a name (referring to a named group), or 'R', 659465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich referring to recursion. R<digits> and R&name are also permitted for 659565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recursion tests. 659665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 659765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich There are ways of testing a named group: (?(name)) is used by Python; 659865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Perl 5.10 onwards uses (?(<name>) or (?('name')). 659965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 660065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich There is one unfortunate ambiguity, caused by history. 'R' can be the 660165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recursive thing or the name 'R' (and similarly for 'R' followed by 660265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich digits). We look for a name first; if not found, we try the other case. 660365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 660465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich For compatibility with auto-callouts, we allow a callout to be 660565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich specified before a condition that is an assertion. First, check for the 660665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich syntax of a callout; if found, adjust the temporary pointer that is 660765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich used to check for an assertion condition. That's all that is needed! */ 660865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 660965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C) 661065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 661165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break; 661265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (ptr[i] == CHAR_RIGHT_PARENTHESIS) 661365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich tempptr += i + 1; 661465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 661565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 661665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* For conditions that are assertions, check the syntax, and then exit 661765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the switch. This will take control down to where bracketed groups, 661865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich including assertions, are processed. */ 661965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 662065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (tempptr[1] == CHAR_QUESTION_MARK && 662165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (tempptr[2] == CHAR_EQUALS_SIGN || 662265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich tempptr[2] == CHAR_EXCLAMATION_MARK || 662365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich tempptr[2] == CHAR_LESS_THAN_SIGN)) 662465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 662565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 662665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Other conditions use OP_CREF/OP_DNCREF/OP_RREF/OP_DNRREF, and all 662765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich need to skip at least 1+IMM2_SIZE bytes at the start of the group. */ 662865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 662965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code[1+LINK_SIZE] = OP_CREF; 663065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skipbytes = 1+IMM2_SIZE; 663165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich refsign = -1; /* => not a number */ 663265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich namelen = -1; /* => not a name; must set to avoid warning */ 663365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich name = NULL; /* Always set to avoid warning */ 663465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recno = 0; /* Always set to avoid warning */ 663565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 663665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Check for a test for recursion in a named group. */ 663765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 663865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 663965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND) 664065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 664165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich terminator = -1; 664265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr += 2; 664365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code[1+LINK_SIZE] = OP_RREF; /* Change the type of test */ 664465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 664565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 664665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Check for a test for a named group's having been set, using the Perl 664765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich syntax (?(<name>) or (?('name'), and also allow for the original PCRE 664865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich syntax of (?(name) or for (?(+n), (?(-n), and just (?(n). */ 664965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 665065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (*ptr == CHAR_LESS_THAN_SIGN) 665165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 665265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich terminator = CHAR_GREATER_THAN_SIGN; 665365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 665465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 665565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (*ptr == CHAR_APOSTROPHE) 665665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 665765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich terminator = CHAR_APOSTROPHE; 665865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 665965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 666065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 666165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 666265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich terminator = CHAR_NULL; 666365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*ptr == CHAR_MINUS || *ptr == CHAR_PLUS) refsign = *ptr++; 666465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (IS_DIGIT(*ptr)) refsign = 0; 666565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 666665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 666765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle a number */ 666865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 666965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (refsign >= 0) 667065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 667165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (IS_DIGIT(*ptr)) 667265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 667365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recno = recno * 10 + (int)(*ptr - CHAR_0); 667465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 667565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 667665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 667765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 667865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Otherwise we expect to read a name; anything else is an error. When 667965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich a name is one of a number of duplicates, a different opcode is used and 668065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich it needs more memory. Unfortunately we cannot tell whether a name is a 668165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich duplicate in the first pass, so we have to allow for more memory. */ 668265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 668365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 668465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 668565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (IS_DIGIT(*ptr)) 668665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 668765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR84; 668865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 668965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 669065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!MAX_255(*ptr) || (cd->ctypes[*ptr] & ctype_word) == 0) 669165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 669265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR28; /* Assertion expected */ 669365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 669465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 669565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich name = ptr++; 669665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) 669765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 669865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 669965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 670065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich namelen = (int)(ptr - name); 670165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lengthptr != NULL && (options & PCRE_DUPNAMES) != 0) 670265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *lengthptr += IMM2_SIZE; 670365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 670465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 670565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Check the terminator */ 670665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 670765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((terminator > 0 && *ptr++ != (pcre_uchar)terminator) || 670865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *ptr++ != CHAR_RIGHT_PARENTHESIS) 670965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 671065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr--; /* Error offset */ 671165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR26; /* Malformed number or name */ 671265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 671365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 671465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 671565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Do no further checking in the pre-compile phase. */ 671665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 671765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lengthptr != NULL) break; 671865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 671965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In the real compile we do the work of looking for the actual 672065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reference. If refsign is not negative, it means we have a number in 672165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recno. */ 672265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 672365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (refsign >= 0) 672465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 672565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (recno <= 0) 672665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 672765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR35; 672865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 672965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 673065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (refsign != 0) recno = (refsign == CHAR_MINUS)? 673165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->bracount - recno + 1 : recno + cd->bracount; 673265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (recno <= 0 || recno > cd->final_bracount) 673365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 673465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR15; 673565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 673665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 673765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT2(code, 2+LINK_SIZE, recno); 673865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 673965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 674065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 674165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Otherwise look for the name. */ 674265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 674365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich slot = cd->name_table; 674465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (i = 0; i < cd->names_found; i++) 674565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 674665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0) break; 674765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich slot += cd->name_entry_size; 674865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 674965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 675065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Found the named subpattern. If the name is duplicated, add one to 675165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the opcode to change CREF/RREF into DNCREF/DNRREF and insert 675265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich appropriate data values. Otherwise, just insert the unique subpattern 675365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich number. */ 675465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 675565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (i < cd->names_found) 675665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 675765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int offset = i++; 675865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int count = 1; 675965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recno = GET2(slot, 0); /* Number from first found */ 676065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (; i < cd->names_found; i++) 676165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 676265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich slot += cd->name_entry_size; 676365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) != 0 || 676465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (slot+IMM2_SIZE)[namelen] != 0) break; 676565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich count++; 676665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 676765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 676865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (count > 1) 676965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 677065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT2(code, 2+LINK_SIZE, offset); 677165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT2(code, 2+LINK_SIZE+IMM2_SIZE, count); 677265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skipbytes += IMM2_SIZE; 677365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code[1+LINK_SIZE]++; 677465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 677565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else /* Not a duplicated name */ 677665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 677765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT2(code, 2+LINK_SIZE, recno); 677865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 677965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 678065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 678165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If terminator == CHAR_NULL it means that the name followed directly 678265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich after the opening parenthesis [e.g. (?(abc)...] and in this case there 678365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich are some further alternatives to try. For the cases where terminator != 678465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_NULL [things like (?(<name>... or (?('name')... or (?(R&name)... ] 678565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich we have now checked all the possibilities, so give an error. */ 678665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 678765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (terminator != CHAR_NULL) 678865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 678965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR15; 679065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 679165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 679265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 679365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Check for (?(R) for recursion. Allow digits after R to specify a 679465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich specific group number. */ 679565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 679665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (*name == CHAR_R) 679765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 679865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recno = 0; 679965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (i = 1; i < namelen; i++) 680065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 680165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!IS_DIGIT(name[i])) 680265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 680365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR15; 680465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 680565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 680665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recno = recno * 10 + name[i] - CHAR_0; 680765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 680865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (recno == 0) recno = RREF_ANY; 680965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code[1+LINK_SIZE] = OP_RREF; /* Change test type */ 681065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT2(code, 2+LINK_SIZE, recno); 681165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 681265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 681365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Similarly, check for the (?(DEFINE) "condition", which is always 681465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich false. */ 681565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 681665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (namelen == 6 && STRNCMP_UC_C8(name, STRING_DEFINE, 6) == 0) 681765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 681865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code[1+LINK_SIZE] = OP_DEF; 681965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skipbytes = 1; 682065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 682165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 682265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Reference to an unidentified subpattern. */ 682365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 682465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 682565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 682665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR15; 682765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 682865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 682965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 683065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 683165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 683265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* ------------------------------------------------------------ */ 683365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_EQUALS_SIGN: /* Positive lookahead */ 683465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bravalue = OP_ASSERT; 683565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->assert_depth += 1; 683665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 683765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 683865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 683965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Optimize (?!) to (*FAIL) unless it is quantified - which is a weird 684065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich thing to do, but Perl allows all assertions to be quantified, and when 684165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich they contain capturing parentheses there may be a potential use for 684265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich this feature. Not that that applies to a quantified (?!) but we allow 684365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich it for uniformity. */ 684465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 684565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* ------------------------------------------------------------ */ 684665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_EXCLAMATION_MARK: /* Negative lookahead */ 684765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 684865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*ptr == CHAR_RIGHT_PARENTHESIS && ptr[1] != CHAR_ASTERISK && 684965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr[1] != CHAR_PLUS && ptr[1] != CHAR_QUESTION_MARK && 685065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (ptr[1] != CHAR_LEFT_CURLY_BRACKET || !is_counted_repeat(ptr+2))) 685165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 685265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_FAIL; 685365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous = NULL; 685465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 685565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 685665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bravalue = OP_ASSERT_NOT; 685765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->assert_depth += 1; 685865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 685965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 686065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 686165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* ------------------------------------------------------------ */ 686265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_LESS_THAN_SIGN: /* Lookbehind or named define */ 686365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch (ptr[1]) 686465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 686565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_EQUALS_SIGN: /* Positive lookbehind */ 686665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bravalue = OP_ASSERTBACK; 686765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->assert_depth += 1; 686865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr += 2; 686965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 687065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 687165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_EXCLAMATION_MARK: /* Negative lookbehind */ 687265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bravalue = OP_ASSERTBACK_NOT; 687365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->assert_depth += 1; 687465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr += 2; 687565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 687665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 687765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: /* Could be name define, else bad */ 687865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (MAX_255(ptr[1]) && (cd->ctypes[ptr[1]] & ctype_word) != 0) 687965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto DEFINE_NAME; 688065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; /* Correct offset for error */ 688165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR24; 688265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 688365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 688465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 688565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 688665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 688765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* ------------------------------------------------------------ */ 688865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_GREATER_THAN_SIGN: /* One-time brackets */ 688965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bravalue = OP_ONCE; 689065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 689165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 689265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 689365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 689465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* ------------------------------------------------------------ */ 689565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_C: /* Callout - may be followed by digits; */ 689665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous_callout = code; /* Save for later completion */ 689765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich after_manual_callout = 1; /* Skip one item before completing */ 689865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_CALLOUT; 689965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 690065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int n = 0; 690165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 690265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while(IS_DIGIT(*ptr)) 690365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich n = n * 10 + *ptr++ - CHAR_0; 690465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*ptr != CHAR_RIGHT_PARENTHESIS) 690565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 690665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR39; 690765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 690865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 690965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (n > 255) 691065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 691165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR38; 691265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 691365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 691465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = n; 691565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT(code, 0, (int)(ptr - cd->start_pattern + 1)); /* Pattern offset */ 691665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT(code, LINK_SIZE, 0); /* Default length */ 691765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += 2 * LINK_SIZE; 691865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 691965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous = NULL; 692065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 692165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 692265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 692365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* ------------------------------------------------------------ */ 692465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_P: /* Python-style named subpattern handling */ 692565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*(++ptr) == CHAR_EQUALS_SIGN || 692665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *ptr == CHAR_GREATER_THAN_SIGN) /* Reference or recursion */ 692765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 692865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich is_recurse = *ptr == CHAR_GREATER_THAN_SIGN; 692965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich terminator = CHAR_RIGHT_PARENTHESIS; 693065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto NAMED_REF_OR_RECURSE; 693165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 693265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (*ptr != CHAR_LESS_THAN_SIGN) /* Test for Python-style defn */ 693365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 693465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR41; 693565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 693665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 693765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Fall through to handle (?P< as (?< is handled */ 693865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 693965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 694065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* ------------------------------------------------------------ */ 694165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich DEFINE_NAME: /* Come here from (?< handling */ 694265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_APOSTROPHE: 694365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich terminator = (*ptr == CHAR_LESS_THAN_SIGN)? 694465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE; 694565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich name = ++ptr; 694665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (IS_DIGIT(*ptr)) 694765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 694865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR84; /* Group name must start with non-digit */ 694965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 695065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 695165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++; 695265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich namelen = (int)(ptr - name); 695365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 695465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In the pre-compile phase, do a syntax check, remember the longest 695565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich name, and then remember the group in a vector, expanding it if 695665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich necessary. Duplicates for the same number are skipped; other duplicates 695765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich are checked for validity. In the actual compile, there is nothing to 695865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do. */ 695965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 696065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lengthptr != NULL) 696165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 696265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich named_group *ng; 696365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uint32 number = cd->bracount + 1; 696465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 696565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*ptr != (pcre_uchar)terminator) 696665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 696765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR42; 696865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 696965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 697065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 697165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cd->names_found >= MAX_NAME_COUNT) 697265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 697365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR49; 697465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 697565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 697665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 697765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (namelen + IMM2_SIZE + 1 > cd->name_entry_size) 697865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 697965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->name_entry_size = namelen + IMM2_SIZE + 1; 698065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (namelen > MAX_NAME_SIZE) 698165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 698265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR48; 698365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 698465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 698565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 698665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 698765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Scan the list to check for duplicates. For duplicate names, if the 698865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich number is the same, break the loop, which causes the name to be 698965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich discarded; otherwise, if DUPNAMES is not set, give an error. 699065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich If it is set, allow the name with a different number, but continue 699165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich scanning in case this is a duplicate with the same number. For 699265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich non-duplicate names, give an error if the number is duplicated. */ 699365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 699465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ng = cd->named_groups; 699565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (i = 0; i < cd->names_found; i++, ng++) 699665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 699765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (namelen == ng->length && 699865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich STRNCMP_UC_UC(name, ng->name, namelen) == 0) 699965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 700065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (ng->number == number) break; 700165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((options & PCRE_DUPNAMES) == 0) 700265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 700365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR43; 700465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 700565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 700665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->dupnames = TRUE; /* Duplicate names exist */ 700765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 700865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (ng->number == number) 700965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 701065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR65; 701165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 701265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 701365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 701465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 701565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (i >= cd->names_found) /* Not a duplicate with same number */ 701665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 701765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Increase the list size if necessary */ 701865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 701965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cd->names_found >= cd->named_group_list_size) 702065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 702165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int newsize = cd->named_group_list_size * 2; 702265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich named_group *newspace = (PUBL(malloc)) 702365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (newsize * sizeof(named_group)); 702465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 702565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (newspace == NULL) 702665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 702765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR21; 702865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 702965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 703065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 703165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memcpy(newspace, cd->named_groups, 703265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->named_group_list_size * sizeof(named_group)); 703365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cd->named_group_list_size > NAMED_GROUP_LIST_SIZE) 703465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (PUBL(free))((void *)cd->named_groups); 703565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->named_groups = newspace; 703665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->named_group_list_size = newsize; 703765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 703865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 703965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->named_groups[cd->names_found].name = name; 704065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->named_groups[cd->names_found].length = namelen; 704165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->named_groups[cd->names_found].number = number; 704265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->names_found++; 704365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 704465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 704565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 704665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; /* Move past > or ' in both passes. */ 704765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto NUMBERED_GROUP; 704865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 704965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 705065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* ------------------------------------------------------------ */ 705165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_AMPERSAND: /* Perl recursion/subroutine syntax */ 705265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich terminator = CHAR_RIGHT_PARENTHESIS; 705365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich is_recurse = TRUE; 705465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Fall through */ 705565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 705665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* We come here from the Python syntax above that handles both 705765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich references (?P=name) and recursion (?P>name), as well as falling 705865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich through from the Perl recursion syntax (?&name). We also come here from 705965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the Perl \k<name> or \k'name' back reference syntax and the \k{name} 706065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich .NET syntax, and the Oniguruma \g<...> and \g'...' subroutine syntax. */ 706165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 706265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich NAMED_REF_OR_RECURSE: 706365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich name = ++ptr; 706465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (IS_DIGIT(*ptr)) 706565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 706665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR84; /* Group name must start with non-digit */ 706765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 706865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 706965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++; 707065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich namelen = (int)(ptr - name); 707165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 707265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In the pre-compile phase, do a syntax check. We used to just set 707365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich a dummy reference number, because it was not used in the first pass. 707465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich However, with the change of recursive back references to be atomic, 707565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich we have to look for the number so that this state can be identified, as 707665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich otherwise the incorrect length is computed. If it's not a backwards 707765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reference, the dummy number will do. */ 707865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 707965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lengthptr != NULL) 708065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 708165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich named_group *ng; 708265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 708365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (namelen == 0) 708465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 708565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR62; 708665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 708765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 708865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*ptr != (pcre_uchar)terminator) 708965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 709065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR42; 709165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 709265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 709365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (namelen > MAX_NAME_SIZE) 709465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 709565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR48; 709665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 709765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 709865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 709965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* The name table does not exist in the first pass; instead we must 710065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich scan the list of names encountered so far in order to get the 710165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich number. If the name is not found, set the value to 0 for a forward 710265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reference. */ 710365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 710465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ng = cd->named_groups; 710565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (i = 0; i < cd->names_found; i++, ng++) 710665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 710765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (namelen == ng->length && 710865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich STRNCMP_UC_UC(name, ng->name, namelen) == 0) 710965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 711065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 711165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recno = (i < cd->names_found)? ng->number : 0; 711265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 711365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Count named back references. */ 711465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 711565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!is_recurse) cd->namedrefcount++; 711665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 711765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If duplicate names are permitted, we have to allow for a named 711865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reference to a duplicated name (this cannot be determined until the 711965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich second pass). This needs an extra 16-bit data item. */ 712065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 712165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((options & PCRE_DUPNAMES) != 0) *lengthptr += IMM2_SIZE; 712265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 712365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 712465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In the real compile, search the name table. We check the name 712565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich first, and then check that we have reached the end of the name in the 712665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich table. That way, if the name is longer than any in the table, the 712765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich comparison will fail without reading beyond the table entry. */ 712865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 712965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 713065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 713165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich slot = cd->name_table; 713265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (i = 0; i < cd->names_found; i++) 713365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 713465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0 && 713565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich slot[IMM2_SIZE+namelen] == 0) 713665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 713765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich slot += cd->name_entry_size; 713865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 713965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 714065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (i < cd->names_found) 714165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 714265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recno = GET2(slot, 0); 714365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 714465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 714565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 714665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR15; 714765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 714865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 714965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 715065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 715165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In both phases, for recursions, we can now go to the code than 715265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich handles numerical recursion. */ 715365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 715465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (is_recurse) goto HANDLE_RECURSION; 715565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 715665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In the second pass we must see if the name is duplicated. If so, we 715765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich generate a different opcode. */ 715865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 715965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lengthptr == NULL && cd->dupnames) 716065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 716165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int count = 1; 716265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich unsigned int index = i; 716365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *cslot = slot + cd->name_entry_size; 716465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 716565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (i++; i < cd->names_found; i++) 716665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 716765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (STRCMP_UC_UC(slot + IMM2_SIZE, cslot + IMM2_SIZE) != 0) break; 716865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 716965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 717065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich count++; 717165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cslot += cd->name_entry_size; 717265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 717365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 717465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (count > 1) 717565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 717665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; 717765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous = code; 717865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = ((options & PCRE_CASELESS) != 0)? OP_DNREFI : OP_DNREF; 717965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT2INC(code, 0, index); 718065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT2INC(code, 0, count); 718165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 718265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Process each potentially referenced group. */ 718365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 718465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (; slot < cslot; slot += cd->name_entry_size) 718565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 718665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich open_capitem *oc; 718765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recno = GET2(slot, 0); 718865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->backref_map |= (recno < 32)? (1 << recno) : 1; 718965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (recno > cd->top_backref) cd->top_backref = recno; 719065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 719165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Check to see if this back reference is recursive, that it, it 719265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich is inside the group that it references. A flag is set so that the 719365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich group can be made atomic. */ 719465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 719565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (oc = cd->open_caps; oc != NULL; oc = oc->next) 719665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 719765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (oc->number == recno) 719865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 719965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich oc->flag = TRUE; 720065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 720165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 720265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 720365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 720465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 720565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; /* End of back ref handling */ 720665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 720765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 720865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 720965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* First pass, or a non-duplicated name. */ 721065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 721165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto HANDLE_REFERENCE; 721265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 721365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 721465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* ------------------------------------------------------------ */ 721565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_R: /* Recursion */ 721665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; /* Same as (?0) */ 721765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Fall through */ 721865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 721965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 722065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* ------------------------------------------------------------ */ 722165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_MINUS: case CHAR_PLUS: /* Recursion or subroutine */ 722265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: 722365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9: 722465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 722565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uchar *called; 722665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich terminator = CHAR_RIGHT_PARENTHESIS; 722765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 722865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Come here from the \g<...> and \g'...' code (Oniguruma 722965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich compatibility). However, the syntax has been checked to ensure that 723065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the ... are a (signed) number, so that neither ERR63 nor ERR29 will 723165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich be called on this path, nor with the jump to OTHER_CHAR_AFTER_QUERY 723265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ever be taken. */ 723365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 723465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich HANDLE_NUMERICAL_RECURSION: 723565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 723665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((refsign = *ptr) == CHAR_PLUS) 723765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 723865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 723965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!IS_DIGIT(*ptr)) 724065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 724165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR63; 724265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 724365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 724465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 724565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (refsign == CHAR_MINUS) 724665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 724765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!IS_DIGIT(ptr[1])) 724865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto OTHER_CHAR_AFTER_QUERY; 724965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 725065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 725165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 725265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recno = 0; 725365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while(IS_DIGIT(*ptr)) 725465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recno = recno * 10 + *ptr++ - CHAR_0; 725565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 725665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*ptr != (pcre_uchar)terminator) 725765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 725865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR29; 725965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 726065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 726165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 726265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (refsign == CHAR_MINUS) 726365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 726465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (recno == 0) 726565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 726665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR58; 726765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 726865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 726965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recno = cd->bracount - recno + 1; 727065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (recno <= 0) 727165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 727265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR15; 727365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 727465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 727565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 727665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (refsign == CHAR_PLUS) 727765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 727865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (recno == 0) 727965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 728065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR58; 728165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 728265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 728365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recno += cd->bracount; 728465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 728565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 728665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Come here from code above that handles a named recursion */ 728765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 728865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich HANDLE_RECURSION: 728965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 729065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous = code; 729165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich called = cd->start_code; 729265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 729365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* When we are actually compiling, find the bracket that is being 729465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich referenced. Temporarily end the regex in case it doesn't exist before 729565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich this point. If we end up with a forward reference, first check that 729665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the bracket does occur later so we can give the error (and position) 729765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich now. Then remember this forward reference in the workspace so it can 729865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich be filled in at the end. */ 729965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 730065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lengthptr == NULL) 730165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 730265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code = OP_END; 730365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (recno != 0) 730465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich called = PRIV(find_bracket)(cd->start_code, utf, recno); 730565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 730665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Forward reference */ 730765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 730865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (called == NULL) 730965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 731065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (recno > cd->final_bracount) 731165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 731265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR15; 731365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 731465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 731565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 731665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Fudge the value of "called" so that when it is inserted as an 731765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich offset below, what it actually inserted is the reference number 731865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich of the group. Then remember the forward reference. */ 731965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 732065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich called = cd->start_code + recno; 732165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cd->hwm >= cd->start_workspace + cd->workspace_size - 732265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich WORK_SIZE_SAFETY_MARGIN) 732365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 732465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = expand_workspace(cd); 732565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*errorcodeptr != 0) goto FAILED; 732665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 732765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUTINC(cd->hwm, 0, (int)(code + 1 - cd->start_code)); 732865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 732965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 733065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If not a forward reference, and the subpattern is still open, 733165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich this is a recursive call. We check to see if this is a left 733265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recursion that could loop for ever, and diagnose that case. We 733365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich must not, however, do this check if we are in a conditional 733465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich subpattern because the condition might be testing for recursion in 733565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich a pattern such as /(?(R)a+|(?R)b)/, which is perfectly valid. 733665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Forever loops are also detected at runtime, so those that occur in 733765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich conditional subpatterns will be picked up then. */ 733865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 733965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (GET(called, 1) == 0 && cond_depth <= 0 && 734065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich could_be_empty(called, code, bcptr, utf, cd)) 734165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 734265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR40; 734365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 734465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 734565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 734665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 734765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Insert the recursion/subroutine item. It does not have a set first 734865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich character (relevant if it is repeated, because it will then be 734965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich wrapped with ONCE brackets). */ 735065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 735165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code = OP_RECURSE; 735265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT(code, 1, (int)(called - cd->start_code)); 735365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += 1 + LINK_SIZE; 735465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich groupsetfirstchar = FALSE; 735565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 735665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 735765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Can't determine a first byte now */ 735865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 735965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; 736065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 736165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 736265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 736365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* ------------------------------------------------------------ */ 736465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: /* Other characters: check option setting */ 736565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OTHER_CHAR_AFTER_QUERY: 736665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich set = unset = 0; 736765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich optset = &set; 736865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 736965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON) 737065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 737165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch (*ptr++) 737265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 737365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_MINUS: optset = &unset; break; 737465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 737565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_J: /* Record that it changed in the external options */ 737665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *optset |= PCRE_DUPNAMES; 737765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->external_flags |= PCRE_JCHANGED; 737865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 737965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 738065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_i: *optset |= PCRE_CASELESS; break; 738165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_m: *optset |= PCRE_MULTILINE; break; 738265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_s: *optset |= PCRE_DOTALL; break; 738365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_x: *optset |= PCRE_EXTENDED; break; 738465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_U: *optset |= PCRE_UNGREEDY; break; 738565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_X: *optset |= PCRE_EXTRA; break; 738665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 738765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: *errorcodeptr = ERR12; 738865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr--; /* Correct the offset */ 738965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 739065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 739165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 739265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 739365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Set up the changed option bits, but don't change anything yet. */ 739465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 739565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich newoptions = (options | set) & (~unset); 739665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 739765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If the options ended with ')' this is not the start of a nested 739865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich group with option changes, so the options change at this level. If this 739965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich item is right at the start of the pattern, the options can be 740065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich abstracted and made external in the pre-compile phase, and ignored in 740165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the compile phase. This can be helpful when matching -- for instance in 740265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich caseless checking of required bytes. 740365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 740465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich If the code pointer is not (cd->start_code + 1 + LINK_SIZE), we are 740565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich definitely *not* at the start of the pattern because something has been 740665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich compiled. In the pre-compile phase, however, the code pointer can have 740765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich that value after the start, because it gets reset as code is discarded 740865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich during the pre-compile. However, this can happen only at top level - if 740965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich we are within parentheses, the starting BRA will still be present. At 741065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich any parenthesis level, the length value can be used to test if anything 741165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich has been compiled at that level. Thus, a test for both these conditions 741265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich is necessary to ensure we correctly detect the start of the pattern in 741365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich both phases. 741465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 741565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich If we are not at the pattern start, reset the greedy defaults and the 741665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case value for firstchar and reqchar. */ 741765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 741865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*ptr == CHAR_RIGHT_PARENTHESIS) 741965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 742065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (code == cd->start_code + 1 + LINK_SIZE && 742165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE)) 742265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 742365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->external_options = newoptions; 742465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 742565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 742665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 742765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich greedy_default = ((newoptions & PCRE_UNGREEDY) != 0); 742865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich greedy_non_default = greedy_default ^ 1; 742965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0; 743065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 743165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 743265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Change options at this level, and pass them back for use 743365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich in subsequent branches. */ 743465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 743565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *optionsptr = options = newoptions; 743665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous = NULL; /* This item can't be repeated */ 743765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; /* It is complete */ 743865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 743965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 744065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If the options ended with ':' we are heading into a nested group 744165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich with possible change of options. Such groups are non-capturing and are 744265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich not assertions of any kind. All we need to do is skip over the ':'; 744365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the newoptions value is handled below. */ 744465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 744565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bravalue = OP_BRA; 744665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 744765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } /* End of switch for character following (? */ 744865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } /* End of (? handling */ 744965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 745065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Opening parenthesis not followed by '*' or '?'. If PCRE_NO_AUTO_CAPTURE 745165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich is set, all unadorned brackets become non-capturing and behave like (?:...) 745265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich brackets. */ 745365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 745465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if ((options & PCRE_NO_AUTO_CAPTURE) != 0) 745565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 745665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bravalue = OP_BRA; 745765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 745865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 745965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Else we have a capturing group. */ 746065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 746165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 746265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 746365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich NUMBERED_GROUP: 746465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->bracount += 1; 746565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT2(code, 1+LINK_SIZE, cd->bracount); 746665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skipbytes = IMM2_SIZE; 746765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 746865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 746965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Process nested bracketed regex. First check for parentheses nested too 747065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich deeply. */ 747165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 747265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((cd->parens_depth += 1) > PARENS_NEST_LIMIT) 747365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 747465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR82; 747565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 747665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 747765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 747865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Assertions used not to be repeatable, but this was changed for Perl 747965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich compatibility, so all kinds can now be repeated. We copy code into a 748065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich non-register variable (tempcode) in order to be able to pass its address 748165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich because some compilers complain otherwise. */ 748265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 748365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous = code; /* For handling repetition */ 748465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code = bravalue; 748565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich tempcode = code; 748665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich tempreqvary = cd->req_varyopt; /* Save value before bracket */ 748765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich tempbracount = cd->bracount; /* Save value before bracket */ 748865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich length_prevgroup = 0; /* Initialize for pre-compile phase */ 748965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 749065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!compile_regex( 749165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich newoptions, /* The complete new option state */ 749265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich &tempcode, /* Where to put code (updated) */ 749365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich &ptr, /* Input pointer (updated) */ 749465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich errorcodeptr, /* Where to put an error message */ 749565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (bravalue == OP_ASSERTBACK || 749665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ 749765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reset_bracount, /* True if (?| group */ 749865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skipbytes, /* Skip over bracket number */ 749965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cond_depth + 750065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ((bravalue == OP_COND)?1:0), /* Depth of condition subpatterns */ 750165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich &subfirstchar, /* For possible first char */ 750265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich &subfirstcharflags, 750365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich &subreqchar, /* For possible last char */ 750465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich &subreqcharflags, 750565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bcptr, /* Current branch chain */ 750665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd, /* Tables block */ 750765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (lengthptr == NULL)? NULL : /* Actual compile phase */ 750865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich &length_prevgroup /* Pre-compile phase */ 750965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich )) 751065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 751165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 751265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->parens_depth -= 1; 751365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 751465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If this was an atomic group and there are no capturing groups within it, 751565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich generate OP_ONCE_NC instead of OP_ONCE. */ 751665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 751765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (bravalue == OP_ONCE && cd->bracount <= tempbracount) 751865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code = OP_ONCE_NC; 751965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 752065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT) 752165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->assert_depth -= 1; 752265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 752365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* At the end of compiling, code is still pointing to the start of the 752465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich group, while tempcode has been updated to point past the end of the group. 752565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich The pattern pointer (ptr) is on the bracket. 752665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 752765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich If this is a conditional bracket, check that there are no more than 752865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich two branches in the group, or just one if it's a DEFINE group. We do this 752965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich in the real compile phase, not in the pre-pass, where the whole group may 753065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich not be available. */ 753165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 753265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (bravalue == OP_COND && lengthptr == NULL) 753365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 753465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *tc = code; 753565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int condcount = 0; 753665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 753765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do { 753865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich condcount++; 753965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich tc += GET(tc,1); 754065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 754165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (*tc != OP_KET); 754265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 754365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* A DEFINE group is never obeyed inline (the "condition" is always 754465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich false). It must have only one branch. */ 754565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 754665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (code[LINK_SIZE+1] == OP_DEF) 754765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 754865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (condcount > 1) 754965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 755065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR54; 755165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 755265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 755365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bravalue = OP_DEF; /* Just a flag to suppress char handling below */ 755465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 755565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 755665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* A "normal" conditional group. If there is just one branch, we must not 755765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich make use of its firstchar or reqchar, because this is equivalent to an 755865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich empty second branch. */ 755965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 756065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 756165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 756265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (condcount > 2) 756365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 756465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR27; 756565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 756665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 756765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (condcount == 1) subfirstcharflags = subreqcharflags = REQ_NONE; 756865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 756965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 757065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 757165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Error if hit end of pattern */ 757265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 757365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*ptr != CHAR_RIGHT_PARENTHESIS) 757465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 757565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR14; 757665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 757765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 757865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 757965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In the pre-compile phase, update the length by the length of the group, 758065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich less the brackets at either end. Then reduce the compiled code to just a 758165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich set of non-capturing brackets so that it doesn't use much memory if it is 758265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich duplicated by a quantifier.*/ 758365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 758465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lengthptr != NULL) 758565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 758665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (OFLOW_MAX - *lengthptr < length_prevgroup - 2 - 2*LINK_SIZE) 758765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 758865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR20; 758965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 759065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 759165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE; 759265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code++; /* This already contains bravalue */ 759365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUTINC(code, 0, 1 + LINK_SIZE); 759465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_KET; 759565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUTINC(code, 0, 1 + LINK_SIZE); 759665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; /* No need to waste time with special character handling */ 759765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 759865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 759965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Otherwise update the main code pointer to the end of the group. */ 760065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 760165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code = tempcode; 760265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 760365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* For a DEFINE group, required and first character settings are not 760465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich relevant. */ 760565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 760665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (bravalue == OP_DEF) break; 760765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 760865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle updating of the required and first characters for other types of 760965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich group. Update for normal brackets of all kinds, and conditions with two 761065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich branches (see code above). If the bracket is followed by a quantifier with 761165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zero repeat, we have to back off. Hence the definition of zeroreqchar and 761265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zerofirstchar outside the main loop so that they can be accessed for the 761365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich back off. */ 761465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 761565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zeroreqchar = reqchar; 761665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zeroreqcharflags = reqcharflags; 761765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zerofirstchar = firstchar; 761865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zerofirstcharflags = firstcharflags; 761965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich groupsetfirstchar = FALSE; 762065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 762165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (bravalue >= OP_ONCE) 762265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 762365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If we have not yet set a firstchar in this branch, take it from the 762465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich subpattern, remembering that it was set here so that a repeat of more 762565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich than one can replicate it as reqchar if necessary. If the subpattern has 762665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich no firstchar, set "none" for the whole branch. In both cases, a zero 762765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeat forces firstchar to "none". */ 762865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 762965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (firstcharflags == REQ_UNSET) 763065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 763165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (subfirstcharflags >= 0) 763265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 763365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich firstchar = subfirstchar; 763465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich firstcharflags = subfirstcharflags; 763565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich groupsetfirstchar = TRUE; 763665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 763765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else firstcharflags = REQ_NONE; 763865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zerofirstcharflags = REQ_NONE; 763965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 764065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 764165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If firstchar was previously set, convert the subpattern's firstchar 764265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich into reqchar if there wasn't one, using the vary flag that was in 764365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich existence beforehand. */ 764465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 764565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (subfirstcharflags >= 0 && subreqcharflags < 0) 764665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 764765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich subreqchar = subfirstchar; 764865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich subreqcharflags = subfirstcharflags | tempreqvary; 764965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 765065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 765165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If the subpattern set a required byte (or set a first byte that isn't 765265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich really the first byte - see above), set it. */ 765365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 765465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (subreqcharflags >= 0) 765565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 765665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqchar = subreqchar; 765765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqcharflags = subreqcharflags; 765865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 765965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 766065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 766165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* For a forward assertion, we take the reqchar, if set. This can be 766265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich helpful if the pattern that follows the assertion doesn't set a different 766365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich char. For example, it's useful for /(?=abcde).+/. We can't set firstchar 766465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for an assertion, however because it leads to incorrect effect for patterns 766565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich such as /(?=a)a.+/ when the "real" "a" would then become a reqchar instead 766665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich of a firstchar. This is overcome by a scan at the end if there's no 766765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich firstchar, looking for an asserted first char. */ 766865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 766965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (bravalue == OP_ASSERT && subreqcharflags >= 0) 767065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 767165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqchar = subreqchar; 767265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqcharflags = subreqcharflags; 767365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 767465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; /* End of processing '(' */ 767565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 767665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 767765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* ===================================================================*/ 767865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle metasequences introduced by \. For ones like \d, the ESC_ values 767965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich are arranged to be the negation of the corresponding OP_values in the 768065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default case when PCRE_UCP is not set. For the back references, the values 768165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich are negative the reference number. Only back references and those types 768265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich that consume a character may be repeated. We can test for values between 768365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ESC_b and ESC_Z for the latter; this may have to change if any new ones are 768465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ever created. */ 768565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 768665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case CHAR_BACKSLASH: 768765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich tempptr = ptr; 768865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options, FALSE); 768965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*errorcodeptr != 0) goto FAILED; 769065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 769165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (escape == 0) /* The escape coded a single character */ 769265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = ec; 769365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 769465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 769565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (escape == ESC_Q) /* Handle start of quoted string */ 769665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 769765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) 769865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr += 2; /* avoid empty string */ 769965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else inescq = TRUE; 770065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 770165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 770265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 770365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (escape == ESC_E) continue; /* Perl ignores an orphan \E */ 770465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 770565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* For metasequences that actually match a character, we disable the 770665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich setting of a first character if it hasn't already been set. */ 770765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 770865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (firstcharflags == REQ_UNSET && escape > ESC_b && escape < ESC_Z) 770965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich firstcharflags = REQ_NONE; 771065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 771165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Set values to reset to if this is followed by a zero repeat. */ 771265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 771365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zerofirstchar = firstchar; 771465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zerofirstcharflags = firstcharflags; 771565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zeroreqchar = reqchar; 771665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zeroreqcharflags = reqcharflags; 771765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 771865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n' 771965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich is a subroutine call by number (Oniguruma syntax). In fact, the value 772065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ESC_g is returned only for these cases. So we don't need to check for < 772165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich or ' if the value is ESC_g. For the Perl syntax \g{n} the value is 772265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich -n, and for the Perl syntax \g{name} the result is ESC_k (as 772365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich that is a synonym for a named back reference). */ 772465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 772565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (escape == ESC_g) 772665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 772765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uchar *p; 772865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uint32 cf; 772965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 773065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich save_hwm = cd->hwm; /* Normally this is set when '(' is read */ 773165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)? 773265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE; 773365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 773465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* These two statements stop the compiler for warning about possibly 773565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In 773665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich fact, because we do the check for a number below, the paths that 773765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich would actually be in error are never taken. */ 773865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 773965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skipbytes = 0; 774065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reset_bracount = FALSE; 774165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 774265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If it's not a signed or unsigned number, treat it as a name. */ 774365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 774465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cf = ptr[1]; 774565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cf != CHAR_PLUS && cf != CHAR_MINUS && !IS_DIGIT(cf)) 774665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 774765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich is_recurse = TRUE; 774865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto NAMED_REF_OR_RECURSE; 774965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 775065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 775165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Signed or unsigned number (cf = ptr[1]) is known to be plus or minus 775265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich or a digit. */ 775365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 775465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich p = ptr + 2; 775565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (IS_DIGIT(*p)) p++; 775665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*p != (pcre_uchar)terminator) 775765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 775865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR57; 775965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 776065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 776165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 776265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto HANDLE_NUMERICAL_RECURSION; 776365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 776465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 776565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* \k<name> or \k'name' is a back reference by name (Perl syntax). 776665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich We also support \k{name} (.NET syntax). */ 776765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 776865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (escape == ESC_k) 776965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 777065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((ptr[1] != CHAR_LESS_THAN_SIGN && 777165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr[1] != CHAR_APOSTROPHE && ptr[1] != CHAR_LEFT_CURLY_BRACKET)) 777265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 777365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR69; 777465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 777565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 777665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich is_recurse = FALSE; 777765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)? 777865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)? 777965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET; 778065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto NAMED_REF_OR_RECURSE; 778165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 778265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 778365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Back references are handled specially; must disable firstchar if 778465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich not set to cope with cases like (?=(\w+))\1: which would otherwise set 778565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ':' later. */ 778665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 778765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (escape < 0) 778865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 778965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich open_capitem *oc; 779065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recno = -escape; 779165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 779265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Come here from named backref handling when the reference is to a 779365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich single group (i.e. not to a duplicated name. */ 779465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 779565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich HANDLE_REFERENCE: 779665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; 779765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous = code; 779865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF; 779965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT2INC(code, 0, recno); 780065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->backref_map |= (recno < 32)? (1 << recno) : 1; 780165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (recno > cd->top_backref) cd->top_backref = recno; 780265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 780365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Check to see if this back reference is recursive, that it, it 780465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich is inside the group that it references. A flag is set so that the 780565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich group can be made atomic. */ 780665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 780765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (oc = cd->open_caps; oc != NULL; oc = oc->next) 780865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 780965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (oc->number == recno) 781065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 781165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich oc->flag = TRUE; 781265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 781365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 781465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 781565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 781665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 781765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* So are Unicode property matches, if supported. */ 781865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 781965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP 782065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (escape == ESC_P || escape == ESC_p) 782165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 782265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich BOOL negated; 782365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich unsigned int ptype = 0, pdata = 0; 782465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr)) 782565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 782665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous = code; 782765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP; 782865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = ptype; 782965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = pdata; 783065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 783165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else 783265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 783365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If Unicode properties are not supported, \X, \P, and \p are not 783465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich allowed. */ 783565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 783665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (escape == ESC_X || escape == ESC_P || escape == ESC_p) 783765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 783865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR45; 783965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto FAILED; 784065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 784165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 784265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 784365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* For the rest (including \X when Unicode properties are supported), we 784465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich can obtain the OP value by negating the escape value in the default 784565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich situation when PCRE_UCP is not set. When it *is* set, we substitute 784665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Unicode property tests. Note that \b and \B do a one-character 784765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich lookbehind, and \A also behaves as if it does. */ 784865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 784965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 785065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 785165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) && 785265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->max_lookbehind == 0) 785365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->max_lookbehind = 1; 785465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP 785565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (escape >= ESC_DU && escape <= ESC_wu) 785665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 785765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich nestptr = ptr + 1; /* Where to resume */ 785865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr = substitutes[escape - ESC_DU] - 1; /* Just before substitute */ 785965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 786065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 786165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 786265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In non-UTF-8 mode, we turn \C into OP_ALLANY instead of OP_ANYBYTE 786365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich so that it works in DFA mode and in lookbehinds. */ 786465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 786565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 786665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous = (escape > ESC_b && escape < ESC_Z)? code : NULL; 786765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape; 786865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 786965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 787065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 787165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 787265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 787365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* We have a data character whose value is in c. In UTF-8 mode it may have 787465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich a value > 127. We set its representation in the length/buffer, and then 787565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich handle it as a data character. */ 787665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 787765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 787865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR) 787965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich mclength = PRIV(ord2utf)(c, mcbuffer); 788065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 788165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 788265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 788365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 788465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich mcbuffer[0] = c; 788565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich mclength = 1; 788665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 788765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto ONE_CHAR; 788865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 788965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 789065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* ===================================================================*/ 789165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Handle a literal character. It is guaranteed not to be whitespace or # 789265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich when the extended flag is set. If we are in a UTF mode, it may be a 789365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich multi-unit literal character. */ 789465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 789565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: 789665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich NORMAL_CHAR: 789765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich mclength = 1; 789865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich mcbuffer[0] = c; 789965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 790065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF 790165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf && HAS_EXTRALEN(c)) 790265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr)); 790365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 790465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 790565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* At this point we have the character's bytes in mcbuffer, and the length 790665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich in mclength. When not in UTF-8 mode, the length is always 1. */ 790765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 790865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ONE_CHAR: 790965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous = code; 791065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 791165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* For caseless UTF-8 mode when UCP support is available, check whether 791265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich this character has more than one other case. If so, generate a special 791365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich OP_PROP item instead of OP_CHARI. */ 791465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 791565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UCP 791665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf && (options & PCRE_CASELESS) != 0) 791765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 791865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich GETCHAR(c, mcbuffer); 791965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((c = UCD_CASESET(c)) != 0) 792065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 792165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_PROP; 792265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = PT_CLIST; 792365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = c; 792465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (firstcharflags == REQ_UNSET) 792565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich firstcharflags = zerofirstcharflags = REQ_NONE; 792665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 792765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 792865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 792965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 793065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 793165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Caseful matches, or not one of the multicase characters. */ 793265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 793365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARI : OP_CHAR; 793465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (c = 0; c < mclength; c++) *code++ = mcbuffer[c]; 793565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 793665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Remember if \r or \n were seen */ 793765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 793865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (mcbuffer[0] == CHAR_CR || mcbuffer[0] == CHAR_NL) 793965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->external_flags |= PCRE_HASCRORLF; 794065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 794165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Set the first and required bytes appropriately. If no previous first 794265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich byte, set it from this character, but revert to none on a zero repeat. 794365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Otherwise, leave the firstchar value alone, and don't change it on a zero 794465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich repeat. */ 794565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 794665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (firstcharflags == REQ_UNSET) 794765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 794865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zerofirstcharflags = REQ_NONE; 794965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zeroreqchar = reqchar; 795065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zeroreqcharflags = reqcharflags; 795165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 795265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If the character is more than one byte long, we can set firstchar 795365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich only if it is not to be matched caselessly. */ 795465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 795565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (mclength == 1 || req_caseopt == 0) 795665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 795765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich firstchar = mcbuffer[0] | req_caseopt; 795865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich firstchar = mcbuffer[0]; 795965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich firstcharflags = req_caseopt; 796065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 796165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (mclength != 1) 796265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 796365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqchar = code[-1]; 796465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqcharflags = cd->req_varyopt; 796565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 796665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 796765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else firstcharflags = reqcharflags = REQ_NONE; 796865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 796965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 797065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* firstchar was previously set; we can set reqchar only if the length is 797165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 1 or the matching is caseful. */ 797265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 797365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 797465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 797565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zerofirstchar = firstchar; 797665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zerofirstcharflags = firstcharflags; 797765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zeroreqchar = reqchar; 797865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zeroreqcharflags = reqcharflags; 797965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (mclength == 1 || req_caseopt == 0) 798065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 798165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqchar = code[-1]; 798265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqcharflags = req_caseopt | cd->req_varyopt; 798365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 798465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 798565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 798665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; /* End of literal character handling */ 798765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 798865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } /* end of big loop */ 798965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 799065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 799165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Control never reaches here by falling through, only by a goto for all the 799265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevicherror states. Pass back the position in the pattern so that it can be displayed 799365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichto the user for diagnosing the error. */ 799465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 799565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichFAILED: 799665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*ptrptr = ptr; 799765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn FALSE; 799865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 799965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 800065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 800165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 800265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 800365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Compile sequence of alternatives * 800465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 800565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 800665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* On entry, ptr is pointing past the bracket character, but on return it 800765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpoints to the closing bracket, or vertical bar, or end of string. The code 800865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvariable is pointing at the byte into which the BRA operator has been stored. 800965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichThis function is used during the pre-compile phase when we are trying to find 801065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichout the amount of memory needed, as well as during the real compile phase. The 801165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvalue of lengthptr distinguishes the two phases. 801265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 801365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 801465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich options option bits, including any changes for this subpattern 801565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich codeptr -> the address of the current code pointer 801665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptrptr -> the address of the current pattern pointer 801765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich errorcodeptr -> pointer to error code variable 801865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich lookbehind TRUE if this is a lookbehind assertion 801965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reset_bracount TRUE to reset the count for each branch 802065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skipbytes skip this many bytes at start (for brackets and OP_COND) 802165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cond_depth depth of nesting for conditional subpatterns 802265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich firstcharptr place to put the first required character 802365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich firstcharflagsptr place to put the first character flags, or a negative number 802465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqcharptr place to put the last required character 802565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqcharflagsptr place to put the last required character flags, or a negative number 802665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bcptr pointer to the chain of currently open branches 802765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd points to the data block with tables pointers etc. 802865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich lengthptr NULL during the real compile phase 802965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich points to length accumulator during pre-compile phase 803065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 803165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: TRUE on success 803265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 803365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 803465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL 803565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcompile_regex(int options, pcre_uchar **codeptr, const pcre_uchar **ptrptr, 803665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes, 803765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int cond_depth, 803865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uint32 *firstcharptr, pcre_int32 *firstcharflagsptr, 803965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uint32 *reqcharptr, pcre_int32 *reqcharflagsptr, 804065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich branch_chain *bcptr, compile_data *cd, int *lengthptr) 804165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 804265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *ptr = *ptrptr; 804365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *code = *codeptr; 804465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *last_branch = code; 804565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *start_bracket = code; 804665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *reverse_count = NULL; 804765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichopen_capitem capitem; 804865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint capnumber = 0; 804965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 firstchar, reqchar; 805065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_int32 firstcharflags, reqcharflags; 805165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 branchfirstchar, branchreqchar; 805265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_int32 branchfirstcharflags, branchreqcharflags; 805365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint length; 805465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichunsigned int orig_bracount; 805565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichunsigned int max_bracount; 805665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbranch_chain bc; 805765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 805865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If set, call the external function that checks for stack availability. */ 805965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 806065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (PUBL(stack_guard) != NULL && PUBL(stack_guard)()) 806165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 806265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr= ERR85; 806365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return FALSE; 806465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 806565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 806665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Miscellaneous initialization */ 806765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 806865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbc.outer = bcptr; 806965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbc.current_branch = code; 807065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 807165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfirstchar = reqchar = 0; 807265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfirstcharflags = reqcharflags = REQ_UNSET; 807365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 807465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Accumulate the length for use in the pre-compile phase. Start with the 807565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlength of the BRA and KET and any extra bytes that are required at the 807665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbeginning. We accumulate in a local variable to save frequent testing of 807765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlenthptr for NULL. We cannot do this by looking at the value of code at the 807865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstart and end of each alternative, because compiled items are discarded during 807965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe pre-compile phase so that the work space is not exceeded. */ 808065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 808165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlength = 2 + 2*LINK_SIZE + skipbytes; 808265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 808365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* WARNING: If the above line is changed for any reason, you must also change 808465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe code that abstracts option settings at the start of the pattern and makes 808565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthem global. It tests the value of length for (2 + 2*LINK_SIZE) in the 808665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpre-compile phase to find out whether anything has yet been compiled or not. */ 808765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 808865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If this is a capturing subpattern, add to the chain of open capturing items 808965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichso that we can detect them if (*ACCEPT) is encountered. This is also used to 809065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdetect groups that contain recursive back references to themselves. Note that 809165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichonly OP_CBRA need be tested here; changing this opcode to one of its variants, 809265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kraleviche.g. OP_SCBRAPOS, happens later, after the group has been compiled. */ 809365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 809465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (*code == OP_CBRA) 809565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 809665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich capnumber = GET2(code, 1 + LINK_SIZE); 809765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich capitem.number = capnumber; 809865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich capitem.next = cd->open_caps; 809965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich capitem.flag = FALSE; 810065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->open_caps = &capitem; 810165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 810265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 810365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Offset is set zero to mark that this bracket is still open */ 810465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 810565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPUT(code, 1, 0); 810665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcode += 1 + LINK_SIZE + skipbytes; 810765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 810865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Loop for each alternative branch */ 810965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 811065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichorig_bracount = max_bracount = cd->bracount; 811165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (;;) 811265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 811365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* For a (?| group, reset the capturing bracket count so that each branch 811465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich uses the same numbers. */ 811565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 811665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (reset_bracount) cd->bracount = orig_bracount; 811765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 811865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Set up dummy OP_REVERSE if lookbehind assertion */ 811965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 812065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lookbehind) 812165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 812265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code++ = OP_REVERSE; 812365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reverse_count = code; 812465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUTINC(code, 0, 0); 812565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich length += 1 + LINK_SIZE; 812665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 812765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 812865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Now compile the branch; in the pre-compile phase its length gets added 812965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich into the length. */ 813065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 813165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstchar, 813265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich &branchfirstcharflags, &branchreqchar, &branchreqcharflags, &bc, 813365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cond_depth, cd, (lengthptr == NULL)? NULL : &length)) 813465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 813565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *ptrptr = ptr; 813665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return FALSE; 813765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 813865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 813965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Keep the highest bracket count in case (?| was used and some branch 814065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich has fewer than the rest. */ 814165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 814265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cd->bracount > max_bracount) max_bracount = cd->bracount; 814365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 814465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* In the real compile phase, there is some post-processing to be done. */ 814565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 814665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lengthptr == NULL) 814765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 814865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If this is the first branch, the firstchar and reqchar values for the 814965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich branch become the values for the regex. */ 815065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 815165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*last_branch != OP_ALT) 815265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 815365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich firstchar = branchfirstchar; 815465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich firstcharflags = branchfirstcharflags; 815565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqchar = branchreqchar; 815665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqcharflags = branchreqcharflags; 815765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 815865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 815965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If this is not the first branch, the first char and reqchar have to 816065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich match the values from all the previous branches, except that if the 816165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previous value for reqchar didn't have REQ_VARY set, it can still match, 816265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich and we set REQ_VARY for the regex. */ 816365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 816465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 816565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 816665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If we previously had a firstchar, but it doesn't match the new branch, 816765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich we have to abandon the firstchar for the regex, but if there was 816865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich previously no reqchar, it takes on the value of the old firstchar. */ 816965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 817065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (firstcharflags >= 0 && 817165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (firstcharflags != branchfirstcharflags || firstchar != branchfirstchar)) 817265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 817365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (reqcharflags < 0) 817465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 817565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqchar = firstchar; 817665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqcharflags = firstcharflags; 817765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 817865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich firstcharflags = REQ_NONE; 817965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 818065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 818165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If we (now or from before) have no firstchar, a firstchar from the 818265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich branch becomes a reqchar if there isn't a branch reqchar. */ 818365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 818465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (firstcharflags < 0 && branchfirstcharflags >= 0 && branchreqcharflags < 0) 818565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 818665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich branchreqchar = branchfirstchar; 818765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich branchreqcharflags = branchfirstcharflags; 818865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 818965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 819065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Now ensure that the reqchars match */ 819165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 819265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (((reqcharflags & ~REQ_VARY) != (branchreqcharflags & ~REQ_VARY)) || 819365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqchar != branchreqchar) 819465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqcharflags = REQ_NONE; 819565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 819665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 819765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqchar = branchreqchar; 819865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqcharflags |= branchreqcharflags; /* To "or" REQ_VARY */ 819965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 820065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 820165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 820265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If lookbehind, check that this branch matches a fixed-length string, and 820365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich put the length into the OP_REVERSE item. Temporarily mark the end of the 820465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich branch with OP_END. If the branch contains OP_RECURSE, the result is -3 820565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich because there may be forward references that we can't check here. Set a 820665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich flag to cause another lookbehind check at the end. Why not do it all at the 820765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich end? Because common, erroneous checks are picked up here and the offset of 820865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the problem can be shown. */ 820965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 821065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lookbehind) 821165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 821265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int fixed_length; 821365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code = OP_END; 821465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich fixed_length = find_fixedlength(last_branch, (options & PCRE_UTF8) != 0, 821565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich FALSE, cd); 821665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich DPRINTF(("fixed length = %d\n", fixed_length)); 821765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (fixed_length == -3) 821865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 821965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->check_lookbehind = TRUE; 822065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 822165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (fixed_length < 0) 822265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 822365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = (fixed_length == -2)? ERR36 : 822465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (fixed_length == -4)? ERR70: ERR25; 822565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *ptrptr = ptr; 822665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return FALSE; 822765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 822865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 822965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 823065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (fixed_length > cd->max_lookbehind) 823165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->max_lookbehind = fixed_length; 823265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT(reverse_count, 0, fixed_length); 823365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 823465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 823565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 823665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 823765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Reached end of expression, either ')' or end of pattern. In the real 823865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich compile phase, go back through the alternative branches and reverse the chain 823965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich of offsets, with the field in the BRA item now becoming an offset to the 824065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich first alternative. If there are no alternatives, it points to the end of the 824165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich group. The length in the terminating ket is always the length of the whole 824265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bracketed item. Return leaving the pointer at the terminating char. */ 824365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 824465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*ptr != CHAR_VERTICAL_LINE) 824565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 824665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lengthptr == NULL) 824765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 824865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int branch_length = (int)(code - last_branch); 824965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do 825065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 825165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int prev_length = GET(last_branch, 1); 825265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT(last_branch, 1, branch_length); 825365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich branch_length = prev_length; 825465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich last_branch -= branch_length; 825565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 825665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (branch_length > 0); 825765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 825865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 825965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Fill in the ket */ 826065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 826165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code = OP_KET; 826265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT(code, 1, (int)(code - start_bracket)); 826365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += 1 + LINK_SIZE; 826465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 826565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If it was a capturing subpattern, check to see if it contained any 826665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recursive back references. If so, we must wrap it in atomic brackets. 826765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich Because we are moving code along, we must ensure that any pending recursive 826865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich references are updated. In any event, remove the block from the chain. */ 826965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 827065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (capnumber > 0) 827165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 827265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cd->open_caps->flag) 827365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 827465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code = OP_END; 827565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich adjust_recurse(start_bracket, 1 + LINK_SIZE, 827665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (options & PCRE_UTF8) != 0, cd, cd->hwm); 827765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memmove(start_bracket + 1 + LINK_SIZE, start_bracket, 827865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich IN_UCHARS(code - start_bracket)); 827965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *start_bracket = OP_ONCE; 828065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += 1 + LINK_SIZE; 828165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT(start_bracket, 1, (int)(code - start_bracket)); 828265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code = OP_KET; 828365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT(code, 1, (int)(code - start_bracket)); 828465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += 1 + LINK_SIZE; 828565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich length += 2 + 2*LINK_SIZE; 828665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 828765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->open_caps = cd->open_caps->next; 828865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 828965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 829065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Retain the highest bracket number, in case resetting was used. */ 829165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 829265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->bracount = max_bracount; 829365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 829465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Set values to pass back */ 829565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 829665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *codeptr = code; 829765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *ptrptr = ptr; 829865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *firstcharptr = firstchar; 829965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *firstcharflagsptr = firstcharflags; 830065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *reqcharptr = reqchar; 830165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *reqcharflagsptr = reqcharflags; 830265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lengthptr != NULL) 830365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 830465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (OFLOW_MAX - *lengthptr < length) 830565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 830665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorcodeptr = ERR20; 830765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return FALSE; 830865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 830965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *lengthptr += length; 831065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 831165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return TRUE; 831265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 831365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 831465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Another branch follows. In the pre-compile phase, we can move the code 831565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pointer back to where it was for the start of the first branch. (That is, 831665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pretend that each branch is the only one.) 831765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 831865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich In the real compile phase, insert an ALT node. Its length field points back 831965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich to the previous branch while the bracket remains open. At the end the chain 832065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich is reversed. It's done like this so that the start of the bracket has a 832165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich zero offset until it is closed, making it possible to detect recursion. */ 832265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 832365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (lengthptr != NULL) 832465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 832565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code = *codeptr + 1 + LINK_SIZE + skipbytes; 832665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich length += 1 + LINK_SIZE; 832765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 832865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 832965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 833065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code = OP_ALT; 833165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT(code, 1, (int)(code - last_branch)); 833265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bc.current_branch = last_branch = code; 833365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += 1 + LINK_SIZE; 833465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 833565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 833665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr++; 833765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 833865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Control never reaches here */ 833965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 834065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 834165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 834265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 834365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 834465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 834565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Check for anchored expression * 834665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 834765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 834865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Try to find out if this is an anchored regular expression. Consider each 834965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichalternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket 835065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichall of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then 835165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichit's anchored. However, if this is a multiline pattern, then only OP_SOD will 835265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbe found, because ^ generates OP_CIRCM in that mode. 835365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 835465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichWe can also consider a regex to be anchored if OP_SOM starts all its branches. 835565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichThis is the code for \G, which means "match at start of match position, taking 835665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichinto account the match offset". 835765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 835865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichA branch is also implicitly anchored if it starts with .* and DOTALL is set, 835965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbecause that will try the rest of the pattern at all possible matching points, 836065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichso there is no point trying again.... er .... 836165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 836265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich.... except when the .* appears inside capturing parentheses, and there is a 836365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsubsequent back reference to those parentheses. We haven't enough information 836465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichto catch that case precisely. 836565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 836665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichAt first, the best we could do was to detect when .* was in capturing brackets 836765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichand the highest back reference was greater than or equal to that level. 836865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichHowever, by keeping a bitmap of the first 31 back references, we can catch some 836965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichof the more common cases more precisely. 837065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 837165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich... A second exception is when the .* appears inside an atomic group, because 837265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthis prevents the number of characters it matches from being adjusted. 837365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 837465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 837565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code points to start of expression (the bracket) 837665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bracket_map a bitmap of which brackets we are inside while testing; this 837765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich handles up to substring 31; after that we just have to take 837865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the less precise approach 837965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd points to the compile data block 838065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich atomcount atomic group level 838165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 838265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: TRUE or FALSE 838365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 838465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 838565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL 838665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis_anchored(register const pcre_uchar *code, unsigned int bracket_map, 838765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich compile_data *cd, int atomcount) 838865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 838965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdo { 839065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uchar *scode = first_significant_code( 839165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code + PRIV(OP_lengths)[*code], FALSE); 839265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich register int op = *scode; 839365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 839465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Non-capturing brackets */ 839565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 839665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (op == OP_BRA || op == OP_BRAPOS || 839765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich op == OP_SBRA || op == OP_SBRAPOS) 839865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 839965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!is_anchored(scode, bracket_map, cd, atomcount)) return FALSE; 840065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 840165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 840265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Capturing brackets */ 840365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 840465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (op == OP_CBRA || op == OP_CBRAPOS || 840565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich op == OP_SCBRA || op == OP_SCBRAPOS) 840665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 840765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int n = GET2(scode, 1+LINK_SIZE); 840865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int new_map = bracket_map | ((n < 32)? (1 << n) : 1); 840965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!is_anchored(scode, new_map, cd, atomcount)) return FALSE; 841065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 841165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 841265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Positive forward assertions and conditions */ 841365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 841465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (op == OP_ASSERT || op == OP_COND) 841565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 841665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!is_anchored(scode, bracket_map, cd, atomcount)) return FALSE; 841765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 841865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 841965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Atomic groups */ 842065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 842165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (op == OP_ONCE || op == OP_ONCE_NC) 842265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 842365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!is_anchored(scode, bracket_map, cd, atomcount + 1)) 842465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return FALSE; 842565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 842665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 842765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and 842865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich it isn't in brackets that are or may be referenced or inside an atomic 842965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich group. */ 843065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 843165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR || 843265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich op == OP_TYPEPOSSTAR)) 843365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 843465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (scode[1] != OP_ALLANY || (bracket_map & cd->backref_map) != 0 || 843565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich atomcount > 0 || cd->had_pruneorskip) 843665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return FALSE; 843765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 843865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 843965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Check for explicit anchoring */ 844065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 844165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (op != OP_SOD && op != OP_SOM && op != OP_CIRC) return FALSE; 844265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 844365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += GET(code, 1); 844465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 844565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (*code == OP_ALT); /* Loop for each alternative */ 844665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn TRUE; 844765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 844865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 844965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 845065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 845165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 845265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Check for starting with ^ or .* * 845365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 845465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 845565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This is called to find out if every branch starts with ^ or .* so that 845665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich"first char" processing can be done to speed things up in multiline 845765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichmatching and for non-DOTALL patterns that start with .* (which must start at 845865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe beginning or after \n). As in the case of is_anchored() (see above), we 845965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichhave to take account of back references to capturing brackets that contain .* 846065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbecause in that case we can't make the assumption. Also, the appearance of .* 846165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichinside atomic brackets or in a pattern that contains *PRUNE or *SKIP does not 846265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcount, because once again the assumption no longer holds. 846365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 846465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 846565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code points to start of expression (the bracket) 846665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich bracket_map a bitmap of which brackets we are inside while testing; this 846765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich handles up to substring 31; after that we just have to take 846865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich the less precise approach 846965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd points to the compile data 847065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich atomcount atomic group level 847165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 847265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: TRUE or FALSE 847365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 847465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 847565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic BOOL 847665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis_startline(const pcre_uchar *code, unsigned int bracket_map, 847765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich compile_data *cd, int atomcount) 847865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 847965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdo { 848065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uchar *scode = first_significant_code( 848165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code + PRIV(OP_lengths)[*code], FALSE); 848265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich register int op = *scode; 848365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 848465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* If we are at the start of a conditional assertion group, *both* the 848565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich conditional assertion *and* what follows the condition must satisfy the test 848665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for start of line. Other kinds of condition fail. Note that there may be an 848765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich auto-callout at the start of a condition. */ 848865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 848965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (op == OP_COND) 849065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 849165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich scode += 1 + LINK_SIZE; 849265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT]; 849365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch (*scode) 849465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 849565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CREF: 849665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_DNCREF: 849765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_RREF: 849865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_DNRREF: 849965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_DEF: 850065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return FALSE; 850165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 850265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: /* Assertion */ 850365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE; 850465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich do scode += GET(scode, 1); while (*scode == OP_ALT); 850565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich scode += 1 + LINK_SIZE; 850665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 850765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 850865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich scode = first_significant_code(scode, FALSE); 850965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich op = *scode; 851065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 851165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 851265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Non-capturing brackets */ 851365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 851465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (op == OP_BRA || op == OP_BRAPOS || 851565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich op == OP_SBRA || op == OP_SBRAPOS) 851665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 851765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE; 851865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 851965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 852065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Capturing brackets */ 852165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 852265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (op == OP_CBRA || op == OP_CBRAPOS || 852365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich op == OP_SCBRA || op == OP_SCBRAPOS) 852465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 852565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int n = GET2(scode, 1+LINK_SIZE); 852665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int new_map = bracket_map | ((n < 32)? (1 << n) : 1); 852765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!is_startline(scode, new_map, cd, atomcount)) return FALSE; 852865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 852965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 853065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Positive forward assertions */ 853165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 853265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (op == OP_ASSERT) 853365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 853465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!is_startline(scode, bracket_map, cd, atomcount)) return FALSE; 853565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 853665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 853765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Atomic brackets */ 853865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 853965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (op == OP_ONCE || op == OP_ONCE_NC) 854065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 854165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!is_startline(scode, bracket_map, cd, atomcount + 1)) return FALSE; 854265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 854365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 854465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* .* means "start at start or after \n" if it isn't in atomic brackets or 854565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich brackets that may be referenced, as long as the pattern does not contain 854665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *PRUNE or *SKIP, because these break the feature. Consider, for example, 854765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /.*?a(*PRUNE)b/ with the subject "aab", which matches "ab", i.e. not at the 854865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich start of a line. */ 854965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 855065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR) 855165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 855265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (scode[1] != OP_ANY || (bracket_map & cd->backref_map) != 0 || 855365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich atomcount > 0 || cd->had_pruneorskip) 855465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return FALSE; 855565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 855665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 855765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Check for explicit circumflex; anything else gives a FALSE result. Note 855865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich in particular that this includes atomic brackets OP_ONCE and OP_ONCE_NC 855965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich because the number of characters matched by .* cannot be adjusted inside 856065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich them. */ 856165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 856265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (op != OP_CIRC && op != OP_CIRCM) return FALSE; 856365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 856465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Move on to the next alternative */ 856565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 856665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += GET(code, 1); 856765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 856865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (*code == OP_ALT); /* Loop for each alternative */ 856965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn TRUE; 857065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 857165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 857265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 857365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 857465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 857565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Check for asserted fixed first char * 857665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 857765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 857865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* During compilation, the "first char" settings from forward assertions are 857965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdiscarded, because they can cause conflicts with actual literals that follow. 858065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichHowever, if we end up without a first char setting for an unanchored pattern, 858165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichit is worth scanning the regex to see if there is an initial asserted first 858265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichchar. If all branches start with the same asserted char, or with a 858365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnon-conditional bracket all of whose alternatives start with the same asserted 858465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichchar (recurse ad lib), then we return that char, with the flags set to zero or 858565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichREQ_CASELESS; otherwise return zero with REQ_NONE in the flags. 858665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 858765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 858865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code points to start of expression (the bracket) 858965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich flags points to the first char flags, or to REQ_NONE 859065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich inassert TRUE if in an assertion 859165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 859265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: the fixed first char, or 0 with REQ_NONE in flags 859365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 859465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 859565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic pcre_uint32 859665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfind_firstassertedchar(const pcre_uchar *code, pcre_int32 *flags, 859765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich BOOL inassert) 859865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 859965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichregister pcre_uint32 c = 0; 860065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint cflags = REQ_NONE; 860165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 860265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*flags = REQ_NONE; 860365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdo { 860465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uint32 d; 860565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int dflags; 860665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int xl = (*code == OP_CBRA || *code == OP_SCBRA || 860765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0; 860865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl, 860965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich TRUE); 861065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich register pcre_uchar op = *scode; 861165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 861265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich switch(op) 861365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 861465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: 861565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return 0; 861665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 861765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_BRA: 861865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_BRAPOS: 861965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CBRA: 862065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_SCBRA: 862165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CBRAPOS: 862265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_SCBRAPOS: 862365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ASSERT: 862465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ONCE: 862565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_ONCE_NC: 862665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich d = find_firstassertedchar(scode, &dflags, op == OP_ASSERT); 862765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (dflags < 0) 862865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return 0; 862965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cflags < 0) { c = d; cflags = dflags; } else if (c != d || cflags != dflags) return 0; 863065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 863165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 863265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EXACT: 863365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich scode += IMM2_SIZE; 863465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Fall through */ 863565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 863665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CHAR: 863765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PLUS: 863865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINPLUS: 863965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSPLUS: 864065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!inassert) return 0; 864165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cflags < 0) { c = scode[1]; cflags = 0; } 864265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (c != scode[1]) return 0; 864365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 864465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 864565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_EXACTI: 864665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich scode += IMM2_SIZE; 864765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Fall through */ 864865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 864965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_CHARI: 865065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_PLUSI: 865165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_MINPLUSI: 865265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case OP_POSPLUSI: 865365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (!inassert) return 0; 865465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; } 865565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (c != scode[1]) return 0; 865665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 865765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 865865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 865965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich code += GET(code, 1); 866065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 866165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (*code == OP_ALT); 866265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 866365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*flags = cflags; 866465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn c; 866565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 866665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 866765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 866865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 866965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 867065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Add an entry to the name/number table * 867165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 867265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 867365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function is called between compiling passes to add an entry to the 867465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichname/number table, maintaining alphabetical order. Checking for permitted 867565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichand forbidden duplicates has already been done. 867665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 867765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 867865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd the compile data block 867965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich name the name to add 868065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich length the length of the name 868165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich groupno the group number 868265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 868365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: nothing 868465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 868565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 868665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstatic void 868765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichadd_name(compile_data *cd, const pcre_uchar *name, int length, 868865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich unsigned int groupno) 868965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 869065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint i; 869165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *slot = cd->name_table; 869265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 869365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfor (i = 0; i < cd->names_found; i++) 869465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 869565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int crc = memcmp(name, slot+IMM2_SIZE, IN_UCHARS(length)); 869665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (crc == 0 && slot[IMM2_SIZE+length] != 0) 869765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich crc = -1; /* Current name is a substring */ 869865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 869965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Make space in the table and break the loop for an earlier name. For a 870065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich duplicate or later name, carry on. We do this for duplicates so that in the 870165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich simple case (when ?(| is not used) they are in order of their numbers. In all 870265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cases they are in the order in which they appear in the pattern. */ 870365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 870465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (crc < 0) 870565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 870665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich memmove(slot + cd->name_entry_size, slot, 870765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich IN_UCHARS((cd->names_found - i) * cd->name_entry_size)); 870865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 870965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 871065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 871165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Continue the loop for a later or duplicate name */ 871265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 871365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich slot += cd->name_entry_size; 871465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 871565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 871665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPUT2(slot, 0, groupno); 871765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichmemcpy(slot + IMM2_SIZE, name, IN_UCHARS(length)); 871865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichslot[IMM2_SIZE + length] = 0; 871965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->names_found++; 872065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 872165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 872265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 872365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 872465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/************************************************* 872565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich* Compile a Regular Expression * 872665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*************************************************/ 872765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 872865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This function takes a string and returns a pointer to a block of store 872965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichholding a compiled version of the expression. The original API for this 873065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfunction had no error code return variable; it is retained for backwards 873165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcompatibility. The new function is given a new name. 873265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 873365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichArguments: 873465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pattern the regular expression 873565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich options various option bits 873665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich errorcodeptr pointer to error code variable (pcre_compile2() only) 873765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich can be NULL if you don't want a code value 873865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich errorptr pointer to pointer to error text 873965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich erroroffset ptr offset in pattern where error was detected 874065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich tables pointer to character tables or NULL 874165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 874265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichReturns: pointer to compiled data block, or NULL on error, 874365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich with errorptr and erroroffset set 874465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 874565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 874665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8 874765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION 874865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_compile(const char *pattern, int options, const char **errorptr, 874965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int *erroroffset, const unsigned char *tables) 875065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16 875165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION 875265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre16_compile(PCRE_SPTR16 pattern, int options, const char **errorptr, 875365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int *erroroffset, const unsigned char *tables) 875465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32 875565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPCRE_EXP_DEFN pcre32 * PCRE_CALL_CONVENTION 875665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre32_compile(PCRE_SPTR32 pattern, int options, const char **errorptr, 875765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int *erroroffset, const unsigned char *tables) 875865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 875965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 876065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8 876165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables); 876265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16 876365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn pcre16_compile2(pattern, options, NULL, errorptr, erroroffset, tables); 876465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32 876565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn pcre32_compile2(pattern, options, NULL, errorptr, erroroffset, tables); 876665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 876765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 876865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 876965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 877065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8 877165de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION 877265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_compile2(const char *pattern, int options, int *errorcodeptr, 877365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const char **errorptr, int *erroroffset, const unsigned char *tables) 877465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16 877565de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION 877665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre16_compile2(PCRE_SPTR16 pattern, int options, int *errorcodeptr, 877765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const char **errorptr, int *erroroffset, const unsigned char *tables) 877865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32 877965de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPCRE_EXP_DEFN pcre32 * PCRE_CALL_CONVENTION 878065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre32_compile2(PCRE_SPTR32 pattern, int options, int *errorcodeptr, 878165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const char **errorptr, int *erroroffset, const unsigned char *tables) 878265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 878365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich{ 878465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichREAL_PCRE *re; 878565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint length = 1; /* For final END opcode */ 878665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_int32 firstcharflags, reqcharflags; 878765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 firstchar, reqchar; 878865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 limit_match = PCRE_UINT32_MAX; 878965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uint32 limit_recursion = PCRE_UINT32_MAX; 879065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint newline; 879165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint errorcode = 0; 879265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichint skipatstart = 0; 879365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL utf; 879465de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichBOOL never_utf = FALSE; 879565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsize_t size; 879665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar *code; 879765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *codestart; 879865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichconst pcre_uchar *ptr; 879965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcompile_data compile_block; 880065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcompile_data *cd = &compile_block; 880165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 880265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This space is used for "compiling" into during the first phase, when we are 880365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcomputing the amount of memory that is needed. Compiled items are thrown away 880465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichas soon as possible, so that a fairly large buffer should be sufficient for 880565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthis purpose. The same space is used in the second phase for remembering where 880665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichto fill in forward references to subpatterns. That may overflow, in which case 880765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnew memory is obtained from malloc(). */ 880865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 880965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_uchar cworkspace[COMPILE_WORK_SIZE]; 881065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 881165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This vector is used for remembering name groups during the pre-compile. In a 881265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsimilar way to cworkspace, it can be expanded using malloc() if necessary. */ 881365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 881465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnamed_group named_groups[NAMED_GROUP_LIST_SIZE]; 881565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 881665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Set this early so that early errors get offset 0. */ 881765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 881865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichptr = (const pcre_uchar *)pattern; 881965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 882065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* We can't pass back an error message if errorptr is NULL; I guess the best we 882165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcan do is just return NULL, but we can set a code value if there is a code 882265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpointer. */ 882365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 882465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (errorptr == NULL) 882565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 882665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (errorcodeptr != NULL) *errorcodeptr = 99; 882765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return NULL; 882865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 882965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 883065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*errorptr = NULL; 883165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (errorcodeptr != NULL) *errorcodeptr = ERR0; 883265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 883365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* However, we can give a message for this error */ 883465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 883565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (erroroffset == NULL) 883665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 883765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich errorcode = ERR16; 883865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto PCRE_EARLY_ERROR_RETURN2; 883965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 884065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 884165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*erroroffset = 0; 884265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 884365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Set up pointers to the individual character tables */ 884465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 884565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (tables == NULL) tables = PRIV(default_tables); 884665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->lcc = tables + lcc_offset; 884765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->fcc = tables + fcc_offset; 884865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->cbits = tables + cbits_offset; 884965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->ctypes = tables + ctypes_offset; 885065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 885165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Check that all undefined public option bits are zero */ 885265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 885365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif ((options & ~PUBLIC_COMPILE_OPTIONS) != 0) 885465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 885565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich errorcode = ERR17; 885665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto PCRE_EARLY_ERROR_RETURN; 885765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 885865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 885965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If PCRE_NEVER_UTF is set, remember it. */ 886065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 886165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif ((options & PCRE_NEVER_UTF) != 0) never_utf = TRUE; 886265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 886365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Check for global one-time settings at the start of the pattern, and remember 886465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe offset for later. */ 886565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 886665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->external_flags = 0; /* Initialize here for LIMIT_MATCH/RECURSION */ 886765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 886865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && 886965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ptr[skipatstart+1] == CHAR_ASTERISK) 887065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 887165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int newnl = 0; 887265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int newbsr = 0; 887365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 887465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* For completeness and backward compatibility, (*UTFn) is supported in the 887565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichrelevant libraries, but (*UTF) is generic and always supported. Note that 887665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. */ 887765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 887865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef COMPILE_PCRE8 887965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF8_RIGHTPAR, 5) == 0) 888065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { skipatstart += 7; options |= PCRE_UTF8; continue; } 888165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 888265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef COMPILE_PCRE16 888365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF16_RIGHTPAR, 6) == 0) 888465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { skipatstart += 8; options |= PCRE_UTF16; continue; } 888565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 888665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef COMPILE_PCRE32 888765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF32_RIGHTPAR, 6) == 0) 888865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { skipatstart += 8; options |= PCRE_UTF32; continue; } 888965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 889065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 889165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 4) == 0) 889265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { skipatstart += 6; options |= PCRE_UTF8; continue; } 889365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0) 889465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { skipatstart += 6; options |= PCRE_UCP; continue; } 889565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_AUTO_POSSESS_RIGHTPAR, 16) == 0) 889665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { skipatstart += 18; options |= PCRE_NO_AUTO_POSSESS; continue; } 889765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0) 889865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; } 889965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 890065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_MATCH_EQ, 12) == 0) 890165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 890265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uint32 c = 0; 890365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int p = skipatstart + 14; 890465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (isdigit(ptr[p])) 890565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 890665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c > PCRE_UINT32_MAX / 10 - 1) break; /* Integer overflow */ 890765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = c*10 + ptr[p++] - CHAR_0; 890865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 890965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break; 891065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c < limit_match) 891165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 891265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich limit_match = c; 891365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->external_flags |= PCRE_MLSET; 891465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 891565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skipatstart = p; 891665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 891765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 891865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 891965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_RECURSION_EQ, 16) == 0) 892065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 892165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uint32 c = 0; 892265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int p = skipatstart + 18; 892365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (isdigit(ptr[p])) 892465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 892565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c > PCRE_UINT32_MAX / 10 - 1) break; /* Integer overflow check */ 892665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich c = c*10 + ptr[p++] - CHAR_0; 892765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 892865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break; 892965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (c < limit_recursion) 893065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 893165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich limit_recursion = c; 893265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->external_flags |= PCRE_RLSET; 893365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 893465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich skipatstart = p; 893565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich continue; 893665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 893765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 893865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0) 893965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { skipatstart += 5; newnl = PCRE_NEWLINE_CR; } 894065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3) == 0) 894165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { skipatstart += 5; newnl = PCRE_NEWLINE_LF; } 894265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CRLF_RIGHTPAR, 5) == 0) 894365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; } 894465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANY_RIGHTPAR, 4) == 0) 894565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; } 894665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANYCRLF_RIGHTPAR, 8) == 0) 894765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; } 894865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 894965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0) 895065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; } 895165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_UNICODE_RIGHTPAR, 12) == 0) 895265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; } 895365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 895465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (newnl != 0) 895565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich options = (options & ~PCRE_NEWLINE_BITS) | newnl; 895665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (newbsr != 0) 895765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich options = (options & ~(PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) | newbsr; 895865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else break; 895965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 896065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 896165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */ 896265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichutf = (options & PCRE_UTF8) != 0; 896365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (utf && never_utf) 896465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 896565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich errorcode = ERR78; 896665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto PCRE_EARLY_ERROR_RETURN2; 896765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 896865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 896965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Can't support UTF unless PCRE has been compiled to include the code. The 897065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn of an error code from PRIV(valid_utf)() is a new feature, introduced in 897165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichrelease 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is 897265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnot used here. */ 897365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 897465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_UTF 897565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (utf && (options & PCRE_NO_UTF8_CHECK) == 0 && 897665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (errorcode = PRIV(valid_utf)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0) 897765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 897865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8 897965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich errorcode = ERR44; 898065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16 898165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich errorcode = ERR74; 898265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32 898365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich errorcode = ERR77; 898465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 898565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto PCRE_EARLY_ERROR_RETURN2; 898665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 898765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else 898865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (utf) 898965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 899065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich errorcode = ERR32; 899165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto PCRE_EARLY_ERROR_RETURN; 899265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 899365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 899465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 899565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Can't support UCP unless PCRE has been compiled to include the code. */ 899665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 899765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef SUPPORT_UCP 899865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif ((options & PCRE_UCP) != 0) 899965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 900065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich errorcode = ERR67; 900165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto PCRE_EARLY_ERROR_RETURN; 900265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 900365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 900465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 900565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Check validity of \R options. */ 900665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 900765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif ((options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 900865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) 900965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 901065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich errorcode = ERR56; 901165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto PCRE_EARLY_ERROR_RETURN; 901265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 901365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 901465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Handle different types of newline. The three bits give seven cases. The 901565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcurrent code allows for fixed one- or two-byte sequences, plus "any" and 901665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich"anycrlf". */ 901765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 901865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichswitch (options & PCRE_NEWLINE_BITS) 901965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 902065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case 0: newline = NEWLINE; break; /* Build-time default */ 902165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case PCRE_NEWLINE_CR: newline = CHAR_CR; break; 902265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case PCRE_NEWLINE_LF: newline = CHAR_NL; break; 902365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case PCRE_NEWLINE_CR+ 902465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break; 902565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case PCRE_NEWLINE_ANY: newline = -1; break; 902665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich case PCRE_NEWLINE_ANYCRLF: newline = -2; break; 902765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN; 902865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 902965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 903065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (newline == -2) 903165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 903265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->nltype = NLTYPE_ANYCRLF; 903365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 903465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichelse if (newline < 0) 903565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 903665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->nltype = NLTYPE_ANY; 903765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 903865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichelse 903965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 904065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->nltype = NLTYPE_FIXED; 904165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (newline > 255) 904265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 904365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->nllen = 2; 904465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->nl[0] = (newline >> 8) & 255; 904565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->nl[1] = newline & 255; 904665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 904765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 904865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 904965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->nllen = 1; 905065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->nl[0] = newline; 905165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 905265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 905365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 905465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Maximum back reference and backref bitmap. The bitmap records up to 31 back 905565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreferences to help in deciding whether (.*) can be treated as anchored or not. 905665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 905765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 905865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->top_backref = 0; 905965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->backref_map = 0; 906065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 906165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Reflect pattern for debugging output */ 906265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 906365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichDPRINTF(("------------------------------------------------------------------\n")); 906465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef PCRE_DEBUG 906565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichprint_puchar(stdout, (PCRE_PUCHAR)pattern); 906665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 906765de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichDPRINTF(("\n")); 906865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 906965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Pretend to compile the pattern while actually just accumulating the length 907065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichof memory required. This behaviour is triggered by passing a non-NULL final 907165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichargument to compile_regex(). We pass a block of workspace (cworkspace) for it 907265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichto compile parts of the pattern into; the compiled code is discarded when it is 907365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichno longer needed, so hopefully this workspace will never overflow, though there 907465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichis a test for its doing so. */ 907565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 907665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->bracount = cd->final_bracount = 0; 907765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->names_found = 0; 907865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->name_entry_size = 0; 907965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->name_table = NULL; 908065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->dupnames = FALSE; 908165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->namedrefcount = 0; 908265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->start_code = cworkspace; 908365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->hwm = cworkspace; 908465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->start_workspace = cworkspace; 908565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->workspace_size = COMPILE_WORK_SIZE; 908665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->named_groups = named_groups; 908765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->named_group_list_size = NAMED_GROUP_LIST_SIZE; 908865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->start_pattern = (const pcre_uchar *)pattern; 908965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern)); 909065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->req_varyopt = 0; 909165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->parens_depth = 0; 909265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->assert_depth = 0; 909365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->max_lookbehind = 0; 909465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->external_options = options; 909565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->open_caps = NULL; 909665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 909765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Now do the pre-compile. On error, errorcode will be set non-zero, so we 909865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdon't need to look at the result of the function here. The initial options have 909965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbeen put into the cd block so that they can be changed if an option setting is 910065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfound within the regex right at the beginning. Bringing initial option settings 910165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichoutside can help speed up starting point checks. */ 910265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 910365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichptr += skipatstart; 910465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcode = cworkspace; 910565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*code = OP_BRA; 910665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 910765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich(void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE, 910865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich FALSE, 0, 0, &firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL, 910965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd, &length); 911065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN; 911165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 911265de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichDPRINTF(("end pre-compile: length=%d workspace=%d\n", length, 911365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (int)(cd->hwm - cworkspace))); 911465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 911565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (length > MAX_PATTERN_SIZE) 911665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 911765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich errorcode = ERR20; 911865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto PCRE_EARLY_ERROR_RETURN; 911965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 912065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 912165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If there are groups with duplicate names and there are also references by 912265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichname, we must allow for the possibility of named references to duplicated 912365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichgroups. These require an extra data item each. */ 912465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 912565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (cd->dupnames && cd->namedrefcount > 0) 912665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich length += cd->namedrefcount * IMM2_SIZE * sizeof(pcre_uchar); 912765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 912865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Compute the size of the data block for storing the compiled pattern. Integer 912965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichoverflow should no longer be possible because nowadays we limit the maximum 913065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvalue of cd->names_found and cd->name_entry_size. */ 913165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 913265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsize = sizeof(REAL_PCRE) + 913365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (length + cd->names_found * cd->name_entry_size) * sizeof(pcre_uchar); 913465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 913565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Get the memory. */ 913665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 913765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre = (REAL_PCRE *)(PUBL(malloc))(size); 913865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (re == NULL) 913965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 914065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich errorcode = ERR21; 914165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich goto PCRE_EARLY_ERROR_RETURN; 914265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 914365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 914465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Put in the magic number, and save the sizes, initial options, internal 914565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichflags, and character table pointer. NULL is used for the default character 914665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichtables. The nullpad field is at the end; it's there to help in the case when a 914765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichregex compiled on a system with 4-byte pointers is run on another with 8-byte 914865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpointers. */ 914965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 915065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->magic_number = MAGIC_NUMBER; 915165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->size = (int)size; 915265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->options = cd->external_options; 915365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->flags = cd->external_flags; 915465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->limit_match = limit_match; 915565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->limit_recursion = limit_recursion; 915665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->first_char = 0; 915765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->req_char = 0; 915865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar); 915965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->name_entry_size = cd->name_entry_size; 916065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->name_count = cd->names_found; 916165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->ref_count = 0; 916265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->tables = (tables == PRIV(default_tables))? NULL : tables; 916365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->nullpad = NULL; 916465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef COMPILE_PCRE32 916565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->dummy = 0; 916665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#else 916765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->dummy1 = re->dummy2 = re->dummy3 = 0; 916865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 916965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 917065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* The starting points of the name/number translation table and of the code are 917165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpassed around in the compile data block. The start/end pattern and initial 917265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichoptions are already set from the pre-compile phase, as is the name_entry_size 917365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfield. Reset the bracket count and the names_found field. Also reset the hwm 917465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfield; this time it's used for remembering forward references to subpatterns. 917565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*/ 917665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 917765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->final_bracount = cd->bracount; /* Save for checking forward references */ 917865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->parens_depth = 0; 917965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->assert_depth = 0; 918065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->bracount = 0; 918165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->max_lookbehind = 0; 918265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->name_table = (pcre_uchar *)re + re->name_table_offset; 918365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcodestart = cd->name_table + re->name_entry_size * re->name_count; 918465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->start_code = codestart; 918565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->hwm = (pcre_uchar *)(cd->start_workspace); 918665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->req_varyopt = 0; 918765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->had_accept = FALSE; 918865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->had_pruneorskip = FALSE; 918965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->check_lookbehind = FALSE; 919065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->open_caps = NULL; 919165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 919265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If any named groups were found, create the name/number table from the list 919365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcreated in the first pass. */ 919465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 919565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (cd->names_found > 0) 919665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 919765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int i = cd->names_found; 919865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich named_group *ng = cd->named_groups; 919965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->names_found = 0; 920065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (; i > 0; i--, ng++) 920165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich add_name(cd, ng->name, ng->length, ng->number); 920265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cd->named_group_list_size > NAMED_GROUP_LIST_SIZE) 920365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (PUBL(free))((void *)cd->named_groups); 920465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 920565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 920665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Set up a starting, non-extracting bracket, then compile the expression. On 920765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevicherror, errorcode will be set non-zero, so we don't need to look at the result 920865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichof the function here. */ 920965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 921065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichptr = (const pcre_uchar *)pattern + skipatstart; 921165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcode = (pcre_uchar *)codestart; 921265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*code = OP_BRA; 921365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich(void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0, 921465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich &firstchar, &firstcharflags, &reqchar, &reqcharflags, NULL, cd, NULL); 921565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->top_bracket = cd->bracount; 921665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->top_backref = cd->top_backref; 921765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->max_lookbehind = cd->max_lookbehind; 921865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichre->flags = cd->external_flags | PCRE_MODE; 921965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 922065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (cd->had_accept) 922165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 922265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqchar = 0; /* Must disable after (*ACCEPT) */ 922365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich reqcharflags = REQ_NONE; 922465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 922565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 922665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If not reached end of pattern on success, there's an excess bracket. */ 922765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 922865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (errorcode == 0 && *ptr != CHAR_NULL) errorcode = ERR22; 922965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 923065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Fill in the terminating state and check for disastrous overflow, but 923165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif debugging, leave the test till after things are printed out. */ 923265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 923365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich*code++ = OP_END; 923465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 923565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifndef PCRE_DEBUG 923665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (code - codestart > length) errorcode = ERR23; 923765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 923865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 923965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef SUPPORT_VALGRIND 924065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If the estimated length exceeds the really used length, mark the extra 924165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichallocated memory as unaddressable, so that any out-of-bound reads can be 924265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdetected. */ 924365de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichVALGRIND_MAKE_MEM_NOACCESS(code, (length - (code - codestart)) * sizeof(pcre_uchar)); 924465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 924565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 924665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Fill in any forward references that are required. There may be repeated 924765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreferences; optimize for them, as searching a large regex takes time. */ 924865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 924965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (cd->hwm > cd->start_workspace) 925065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 925165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int prev_recno = -1; 925265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const pcre_uchar *groupptr = NULL; 925365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich while (errorcode == 0 && cd->hwm > cd->start_workspace) 925465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 925565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int offset, recno; 925665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd->hwm -= LINK_SIZE; 925765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich offset = GET(cd->hwm, 0); 925865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich recno = GET(codestart, offset); 925965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (recno != prev_recno) 926065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 926165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich groupptr = PRIV(find_bracket)(codestart, utf, recno); 926265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich prev_recno = recno; 926365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 926465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (groupptr == NULL) errorcode = ERR53; 926565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else PUT(((pcre_uchar *)codestart), offset, (int)(groupptr - codestart)); 926665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 926765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 926865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 926965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If the workspace had to be expanded, free the new memory. Set the pointer to 927065de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichNULL to indicate that forward references have been filled in. */ 927165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 927265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (cd->workspace_size > COMPILE_WORK_SIZE) 927365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (PUBL(free))((void *)cd->start_workspace); 927465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcd->start_workspace = NULL; 927565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 927665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Give an error if there's back reference to a non-existent capturing 927765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichsubpattern. */ 927865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 927965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15; 928065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 928165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Unless disabled, check whether any single character iterators can be 928265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichauto-possessified. The function overwrites the appropriate opcode values, so 928365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe type of the pointer must be cast. NOTE: the intermediate variable "temp" is 928465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichused in this code because at least one compiler gives a warning about loss of 928565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich"const" attribute if the cast (pcre_uchar *)codestart is used directly in the 928665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichfunction call. */ 928765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 928865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif ((options & PCRE_NO_AUTO_POSSESS) == 0) 928965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 929065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *temp = (pcre_uchar *)codestart; 929165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich auto_possessify(temp, utf, cd); 929265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 929365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 929465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If there were any lookbehind assertions that contained OP_RECURSE 929565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich(recursions or subroutine calls), a flag is set for them to be checked here, 929665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbecause they may contain forward references. Actual recursions cannot be fixed 929765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlength, but subroutine calls can. It is done like this so that those without 929865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichOP_RECURSE that are not fixed length get a diagnosic with a useful offset. The 929965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichexceptional ones forgo this. We scan the pattern to check that they are fixed 930065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichlength, and set their lengths. */ 930165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 930265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (cd->check_lookbehind) 930365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 930465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *cc = (pcre_uchar *)codestart; 930565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 930665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* Loop, searching for OP_REVERSE items, and process those that do not have 930765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich their length set. (Actually, it will also re-process any that have a length 930865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich of zero, but that is a pathological case, and it does no harm.) When we find 930965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich one, we temporarily terminate the branch it is in while we scan it. */ 931065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 931165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich for (cc = (pcre_uchar *)PRIV(find_bracket)(codestart, utf, -1); 931265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cc != NULL; 931365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cc = (pcre_uchar *)PRIV(find_bracket)(cc, utf, -1)) 931465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 931565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (GET(cc, 1) == 0) 931665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 931765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int fixed_length; 931865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE); 931965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich int end_op = *be; 932065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *be = OP_END; 932165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE, 932265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cd); 932365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *be = end_op; 932465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich DPRINTF(("fixed length = %d\n", fixed_length)); 932565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (fixed_length < 0) 932665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 932765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich errorcode = (fixed_length == -2)? ERR36 : 932865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (fixed_length == -4)? ERR70 : ERR25; 932965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 933065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 933165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (fixed_length > cd->max_lookbehind) cd->max_lookbehind = fixed_length; 933265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PUT(cc, 1, fixed_length); 933365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 933465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich cc += 1 + LINK_SIZE; 933565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 933665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 933765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 933865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Failed to compile, or error while post-processing */ 933965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 934065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (errorcode != 0) 934165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 934265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (PUBL(free))(re); 934365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PCRE_EARLY_ERROR_RETURN: 934465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *erroroffset = (int)(ptr - (const pcre_uchar *)pattern); 934565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich PCRE_EARLY_ERROR_RETURN2: 934665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorptr = find_error_text(errorcode); 934765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (errorcodeptr != NULL) *errorcodeptr = errorcode; 934865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return NULL; 934965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 935065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 935165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* If the anchored option was not passed, set the flag if we can determine that 935265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichthe pattern is anchored by virtue of ^ characters or \A or anything else, such 935365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichas starting with non-atomic .* when DOTALL is set and there are no occurrences 935465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichof *PRUNE or *SKIP. 935565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 935665de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichOtherwise, if we know what the first byte has to be, save it, because that 935765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichspeeds up unanchored matches no end. If not, see if we can set the 935865de34233da93a3d65c00b8aad3ff9aad44c57deNick KralevichPCRE_STARTLINE flag. This is helpful for multiline matches when all branches 935965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichstart with ^. and also when all branches start with non-atomic .* for 936065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichnon-DOTALL matches when *PRUNE and SKIP are not present. */ 936165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 936265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif ((re->options & PCRE_ANCHORED) == 0) 936365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 936465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (is_anchored(codestart, 0, cd, 0)) re->options |= PCRE_ANCHORED; 936565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 936665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 936765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (firstcharflags < 0) 936865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich firstchar = find_firstassertedchar(codestart, &firstcharflags, FALSE); 936965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (firstcharflags >= 0) /* Remove caseless flag for non-caseable chars */ 937065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 937165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8 937265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich re->first_char = firstchar & 0xff; 937365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16 937465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich re->first_char = firstchar & 0xffff; 937565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32 937665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich re->first_char = firstchar; 937765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 937865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((firstcharflags & REQ_CASELESS) != 0) 937965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 938065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) 938165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* We ignore non-ASCII first chars in 8 bit mode. */ 938265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf) 938365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 938465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (re->first_char < 128) 938565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 938665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cd->fcc[re->first_char] != re->first_char) 938765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich re->flags |= PCRE_FCH_CASELESS; 938865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 938965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (UCD_OTHERCASE(re->first_char) != re->first_char) 939065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich re->flags |= PCRE_FCH_CASELESS; 939165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 939265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 939365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 939465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (MAX_255(re->first_char) 939565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich && cd->fcc[re->first_char] != re->first_char) 939665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich re->flags |= PCRE_FCH_CASELESS; 939765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 939865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 939965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich re->flags |= PCRE_FIRSTSET; 940065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 940165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 940265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (is_startline(codestart, 0, cd, 0)) re->flags |= PCRE_STARTLINE; 940365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 940465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 940565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 940665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* For an anchored pattern, we use the "required byte" only if it follows a 940765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichvariable length item in the regex. Remove the caseless flag for non-caseable 940865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichbytes. */ 940965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 941065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (reqcharflags >= 0 && 941165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ((re->options & PCRE_ANCHORED) == 0 || (reqcharflags & REQ_VARY) != 0)) 941265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 941365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8 941465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich re->req_char = reqchar & 0xff; 941565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16 941665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich re->req_char = reqchar & 0xffff; 941765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32 941865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich re->req_char = reqchar; 941965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 942065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if ((reqcharflags & REQ_CASELESS) != 0) 942165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 942265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) 942365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich /* We ignore non-ASCII first chars in 8 bit mode. */ 942465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (utf) 942565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 942665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (re->req_char < 128) 942765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 942865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (cd->fcc[re->req_char] != re->req_char) 942965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich re->flags |= PCRE_RCH_CASELESS; 943065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 943165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else if (UCD_OTHERCASE(re->req_char) != re->req_char) 943265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich re->flags |= PCRE_RCH_CASELESS; 943365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 943465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else 943565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 943665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (MAX_255(re->req_char) && cd->fcc[re->req_char] != re->req_char) 943765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich re->flags |= PCRE_RCH_CASELESS; 943865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 943965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 944065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich re->flags |= PCRE_REQCHSET; 944165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 944265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 944365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Print out the compiled data if debugging is enabled. This is never the 944465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcase when building a production library. */ 944565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 944665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#ifdef PCRE_DEBUG 944765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichprintf("Length = %d top_bracket = %d top_backref = %d\n", 944865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich length, re->top_bracket, re->top_backref); 944965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 945065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichprintf("Options=%08x\n", re->options); 945165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 945265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif ((re->flags & PCRE_FIRSTSET) != 0) 945365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 945465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar ch = re->first_char; 945565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const char *caseless = 945665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ((re->flags & PCRE_FCH_CASELESS) == 0)? "" : " (caseless)"; 945765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (PRINTABLE(ch)) printf("First char = %c%s\n", ch, caseless); 945865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else printf("First char = \\x%02x%s\n", ch, caseless); 945965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 946065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 946165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif ((re->flags & PCRE_REQCHSET) != 0) 946265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 946365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich pcre_uchar ch = re->req_char; 946465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich const char *caseless = 946565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich ((re->flags & PCRE_RCH_CASELESS) == 0)? "" : " (caseless)"; 946665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (PRINTABLE(ch)) printf("Req char = %c%s\n", ch, caseless); 946765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich else printf("Req char = \\x%02x%s\n", ch, caseless); 946865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 946965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 947065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8 947165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre_printint((pcre *)re, stdout, TRUE); 947265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16 947365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre16_printint((pcre *)re, stdout, TRUE); 947465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32 947565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichpcre32_printint((pcre *)re, stdout, TRUE); 947665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 947765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 947865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* This check is done here in the debugging case so that the code that 947965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwas compiled can be seen. */ 948065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 948165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichif (code - codestart > length) 948265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 948365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich (PUBL(free))(re); 948465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *errorptr = find_error_text(ERR23); 948565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich *erroroffset = ptr - (pcre_uchar *)pattern; 948665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (errorcodeptr != NULL) *errorcodeptr = ERR23; 948765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich return NULL; 948865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 948965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif /* PCRE_DEBUG */ 949065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 949165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* Check for a pattern than can match an empty string, so that this information 949265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichcan be provided to applications. */ 949365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 949465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichdo 949565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 949665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich if (could_be_empty_branch(codestart, code, utf, cd, NULL)) 949765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich { 949865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich re->flags |= PCRE_MATCH_EMPTY; 949965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich break; 950065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 950165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich codestart += GET(codestart, 1); 950265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich } 950365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichwhile (*codestart == OP_ALT); 950465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 950565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#if defined COMPILE_PCRE8 950665de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn (pcre *)re; 950765de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE16 950865de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn (pcre16 *)re; 950965de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#elif defined COMPILE_PCRE32 951065de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevichreturn (pcre32 *)re; 951165de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich#endif 951265de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich} 951365de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 951465de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich/* End of pcre_compile.c */ 951565de34233da93a3d65c00b8aad3ff9aad44c57deNick Kralevich 9516