1f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/************************************************* 2f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich* Perl-Compatible Regular Expressions * 3f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/ 4f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* PCRE is a library of functions to support regular expressions whose syntax 6f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichand semantics are as close as possible to those of the Perl 5 language. 7f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich Written by Philip Hazel 953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis Original API code Copyright (c) 1997-2012 University of Cambridge 1053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis New API code Copyright (c) 2016 University of Cambridge 11f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 12f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich----------------------------------------------------------------------------- 13f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichRedistribution and use in source and binary forms, with or without 14f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichmodification, are permitted provided that the following conditions are met: 15f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 16f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich * Redistributions of source code must retain the above copyright notice, 17f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich this list of conditions and the following disclaimer. 18f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 19f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich * Redistributions in binary form must reproduce the above copyright 20f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich notice, this list of conditions and the following disclaimer in the 21f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich documentation and/or other materials provided with the distribution. 22f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 23f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich * Neither the name of the University of Cambridge nor the names of its 24f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich contributors may be used to endorse or promote products derived from 25f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich this software without specific prior written permission. 26f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 27f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichLIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichPOSSIBILITY OF SUCH DAMAGE. 38f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich----------------------------------------------------------------------------- 39f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/ 40f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 41f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 42f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#ifdef HAVE_CONFIG_H 43f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#include "config.h" 44f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 45f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define NLBLOCK cb /* Block containing newline information */ 4753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define PSSTART start_pattern /* Field containing processed string start */ 4853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define PSEND end_pattern /* Field containing processed string end */ 49f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#include "pcre2_internal.h" 51f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* In rare error cases debugging might require calling pcre2_printint(). */ 53f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if 0 5553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef EBCDIC 5653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define PRINTABLE(c) ((c) >= 64 && (c) < 255) 5753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else 5853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define PRINTABLE(c) ((c) >= 32 && (c) < 127) 5953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 6053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#include "pcre2_printint.c" 6153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define CALL_PRINTINT 62f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 63f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* There are a few things that vary with different code unit sizes. Handle them 6553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisby defining macros in order to minimize #if usage. */ 66f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if PCRE2_CODE_UNIT_WIDTH == 8 6853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define STRING_UTFn_RIGHTPAR STRING_UTF8_RIGHTPAR, 5 6953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define XDIGIT(c) xdigitab[c] 70f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else /* Either 16-bit or 32-bit */ 7253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define XDIGIT(c) (MAX_255(c)? xdigitab[c] : 0xff) 73f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if PCRE2_CODE_UNIT_WIDTH == 16 7553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define STRING_UTFn_RIGHTPAR STRING_UTF16_RIGHTPAR, 6 76f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else /* 32-bit */ 7853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define STRING_UTFn_RIGHTPAR STRING_UTF32_RIGHTPAR, 6 7953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 8053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 81f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Function definitions to allow mutual recursion */ 83f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 848b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisstatic unsigned int 8553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis add_list_to_class(uint8_t *, PCRE2_UCHAR **, uint32_t, compile_block *, 8653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis const uint32_t *, unsigned int); 87f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 88f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic BOOL 8953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis compile_regex(uint32_t, PCRE2_UCHAR **, PCRE2_SPTR *, int *, BOOL, BOOL, 9053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t, int, uint32_t *, int32_t *, uint32_t *, int32_t *, 9153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis branch_chain *, compile_block *, size_t *); 92f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 93f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 94f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 95f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/************************************************* 96f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich* Code parameters and static tables * 97f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/ 98f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 9953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This value specifies the size of stack workspace, which is used in different 10053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisways in the different pattern scans. The group-identifying pre-scan uses it to 10153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskishandle nesting, and needs it to be 16-bit aligned. 102f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 10353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisDuring the first compiling phase, when determining how much memory is required, 10453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe regex is partly compiled into this space, but the compiled parts are 10553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisdiscarded as soon as they can be, so that hopefully there will never be an 10653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisoverrun. The code does, however, check for an overrun, which can occur for 10753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispathological patterns. The size of the workspace depends on LINK_SIZE because 10853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe length of compiled items varies with this. 109f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 11053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisIn the real compile phase, the workspace is used for remembering data about 11153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnumbered groups, provided there are not too many of them (if there are, extra 11253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismemory is acquired). For this phase the memory must be 32-bit aligned. Having 11353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisdefined the size in code units, we set up C32_WORK_SIZE as the number of 11453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiselements in the 32-bit vector. */ 11553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 11653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define COMPILE_WORK_SIZE (2048*LINK_SIZE) /* Size in code units */ 11753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 11853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define C32_WORK_SIZE \ 11953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ((COMPILE_WORK_SIZE * sizeof(PCRE2_UCHAR))/sizeof(uint32_t)) 120f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 121f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* The overrun tests check for a slightly smaller size so that they detect the 122f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichoverrun before it actually does run off the end of the data block. */ 123f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 124f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#define WORK_SIZE_SAFETY_MARGIN (100) 125f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 12653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This value determines the size of the initial vector that is used for 12753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisremembering named groups during the pre-compile. It is allocated on the stack, 12853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisbut if it is too small, it is expanded, in a similar way to the workspace. The 12953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisvalue is the number of slots in the list. */ 13053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 13153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define NAMED_GROUP_LIST_SIZE 20 13253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 13353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* The original PCRE required patterns to be zero-terminated, and it simplifies 13453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe compiling code if it is guaranteed that there is a zero code unit at the 13553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisend of the pattern, because this means that tests for coding sequences such as 13653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis(*SKIP) or even just (?<= can check a sequence of code units without having to 13753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiskeep checking for the end of the pattern. The new PCRE2 API allows zero code 13853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisunits within patterns if a positive length is given, but in order to keep most 13953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisof the compiling code as it was, we copy such patterns and add a zero on the 14053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisend. This value determines the size of space on the stack that is used if the 14153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispattern fits; if not, heap memory is used. */ 14253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 14353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define COPIED_PATTERN_SIZE 1024 14453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 14553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Maximum length value to check against when making sure that the variable 14653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthat holds the compiled pattern length does not overflow. We make it a bit less 14753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthan INT_MAX to allow for adding in group terminating bytes, so that we don't 14853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskishave to check them every time. */ 14953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 15053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define OFLOW_MAX (INT_MAX - 20) 15153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 1528b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis/* Macro for setting individual bits in class bitmaps. It took some 1538b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisexperimenting to figure out how to stop gcc 5.3.0 from warning with 1548b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis-Wconversion. This version gets a warning: 1558b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis 1568b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis #define SETBIT(a,b) a[(b)/8] |= (uint8_t)(1 << ((b)&7)) 1578b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis 1588b979b2abae173bb836d8e85a842cfd00447d4beJanis DanisevskisLet's hope the apparently less efficient version isn't actually so bad if the 1598b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskiscompiler is clever with identical subexpressions. */ 16053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 1618b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis#define SETBIT(a,b) a[(b)/8] = (uint8_t)(a[(b)/8] | (1 << ((b)&7))) 16253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 16353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Private flags added to firstcu and reqcu. */ 164f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 165f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#define REQ_CASELESS (1 << 0) /* Indicates caselessness */ 16653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define REQ_VARY (1 << 1) /* reqcu followed non-literal item */ 16753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Negative values for the firstcu and reqcu flags */ 16853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define REQ_UNSET (-2) /* Not yet found anything */ 16953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define REQ_NONE (-1) /* Found not fixed char */ 17053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 17153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* These flags are used in the groupinfo vector. */ 17253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 17353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define GI_SET_COULD_BE_EMPTY 0x80000000u 17453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define GI_COULD_BE_EMPTY 0x40000000u 17553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define GI_NOT_FIXED_LENGTH 0x20000000u 17653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define GI_SET_FIXED_LENGTH 0x10000000u 17753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define GI_FIXED_LENGTH_MASK 0x0000ffffu 17853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 17953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This bit (which is greater than any UTF value) is used to indicate that a 18053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisvariable contains a number of code units instead of an actual code point. */ 181f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 18253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define UTF_LENGTH 0x10000000l 183f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 18453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This simple test for a decimal digit works for both ASCII/Unicode and EBCDIC 18553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisand is fast (a good compiler can turn it into a subtraction and unsigned 18653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscomparison). */ 187f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 18853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define IS_DIGIT(x) ((x) >= CHAR_0 && (x) <= CHAR_9) 18953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 19053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Table to identify hex digits. The tables in chartables are dependent on the 19153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskislocale, and may mark arbitrary characters as digits. We want to recognize only 19253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis0-9, a-z, and A-Z as hex digits, which is why we have a private table here. It 19353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscosts 256 bytes, but it is a lot faster than doing character value tests (at 19453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisleast in some simple cases I timed), and in some applications one wants PCRE to 19553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscompile efficiently as well as match efficiently. The value in the table is 19653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe binary hex digit value, or 0xff for non-hex digits. */ 19753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 19853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This is the "normal" case, for ASCII systems, and EBCDIC systems running in 19953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisUTF-8 mode. */ 200f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 201f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#ifndef EBCDIC 20253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const uint8_t xdigitab[] = 20353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 20453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 0- 7 */ 20553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 8- 15 */ 20653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 16- 23 */ 20753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 24- 31 */ 20853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* - ' */ 20953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* ( - / */ 21053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, /* 0 - 7 */ 21153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0x08,0x09,0xff,0xff,0xff,0xff,0xff,0xff, /* 8 - ? */ 21253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0xff, /* @ - G */ 21353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* H - O */ 21453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* P - W */ 21553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* X - _ */ 21653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0xff, /* ` - g */ 21753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* h - o */ 21853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* p - w */ 21953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* x -127 */ 22053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 128-135 */ 22153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 136-143 */ 22253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 144-151 */ 22353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 152-159 */ 22453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 160-167 */ 22553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 168-175 */ 22653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 176-183 */ 22753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 184-191 */ 22853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 192-199 */ 22953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 2ff-207 */ 23053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 208-215 */ 23153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 216-223 */ 23253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 224-231 */ 23353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 232-239 */ 23453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 240-247 */ 23553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff};/* 248-255 */ 23653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 23753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else 23853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 23953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */ 24053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 24153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const uint8_t xdigitab[] = 24253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 24353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 0- 7 0 */ 24453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 8- 15 */ 24553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 16- 23 10 */ 24653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 24- 31 */ 24753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 32- 39 20 */ 24853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 40- 47 */ 24953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 48- 55 30 */ 25053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 56- 63 */ 25153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* - 71 40 */ 25253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 72- | */ 25353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* & - 87 50 */ 25453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 88- 95 */ 25553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* - -103 60 */ 25653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 104- ? */ 25753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 112-119 70 */ 25853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 120- " */ 25953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0xff, /* 128- g 80 */ 26053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* h -143 */ 26153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 144- p 90 */ 26253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* q -159 */ 26353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 160- x A0 */ 26453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* y -175 */ 26553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* ^ -183 B0 */ 26653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 184-191 */ 26753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0xff, /* { - G C0 */ 26853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* H -207 */ 26953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* } - P D0 */ 27053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* Q -223 */ 27153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* \ - X E0 */ 27253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* Y -239 */ 27353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, /* 0 - 7 F0 */ 27453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0x08,0x09,0xff,0xff,0xff,0xff,0xff,0xff};/* 8 -255 */ 27553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif /* EBCDIC */ 27653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 27753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 27853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Table for handling alphanumeric escaped characters. Positive returns are 27953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissimple data values; negative values are for special things like \d and so on. 28053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisZero means further processing is needed (for things like \x), or the escape is 28153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisinvalid. */ 282f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 283f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* This is the "normal" table for ASCII systems or for EBCDIC systems running 28453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisin UTF-8 mode. It runs from '0' to 'z'. */ 28553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 28653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifndef EBCDIC 28753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define ESCAPES_FIRST CHAR_0 28853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define ESCAPES_LAST CHAR_z 28953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define UPPER_CASE(c) (c-32) 290f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 291f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic const short int escapes[] = { 292f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, 0, 293f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, 0, 294f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, 0, 295f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, 0, 296f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, 0, 297f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_COLON, CHAR_SEMICOLON, 298f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_LESS_THAN_SIGN, CHAR_EQUALS_SIGN, 299f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_GREATER_THAN_SIGN, CHAR_QUESTION_MARK, 300f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_COMMERCIAL_AT, -ESC_A, 301f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich -ESC_B, -ESC_C, 302f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich -ESC_D, -ESC_E, 303f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, -ESC_G, 304f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich -ESC_H, 0, 305f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, -ESC_K, 306f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, 0, 307f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich -ESC_N, 0, 308f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich -ESC_P, -ESC_Q, 309f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich -ESC_R, -ESC_S, 310f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, 0, 311f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich -ESC_V, -ESC_W, 312f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich -ESC_X, 0, 313f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich -ESC_Z, CHAR_LEFT_SQUARE_BRACKET, 314f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_BACKSLASH, CHAR_RIGHT_SQUARE_BRACKET, 315f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_CIRCUMFLEX_ACCENT, CHAR_UNDERSCORE, 3160ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes CHAR_GRAVE_ACCENT, ESC_a, 317f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich -ESC_b, 0, 318f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich -ESC_d, ESC_e, 319f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ESC_f, 0, 320f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich -ESC_h, 0, 321f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, -ESC_k, 322f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, 0, 323f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ESC_n, 0, 324f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich -ESC_p, 0, 325f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ESC_r, -ESC_s, 326f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ESC_tee, 0, 327f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich -ESC_v, -ESC_w, 328f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, 0, 329f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich -ESC_z 330f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}; 331f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 332f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#else 333f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 33453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This is the "abnormal" table for EBCDIC systems without UTF-8 support. 33553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisIt runs from 'a' to '9'. For some minimal testing of EBCDIC features, the code 33653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisis sometimes compiled on an ASCII system. In this case, we must not use CHAR_a 33753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisbecause it is defined as 'a', which of course picks up the ASCII value. */ 33853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 33953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if 'a' == 0x81 /* Check for a real EBCDIC environment */ 34053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define ESCAPES_FIRST CHAR_a 34153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define ESCAPES_LAST CHAR_9 34253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define UPPER_CASE(c) (c+64) 34353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else /* Testing in an ASCII environment */ 34453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define ESCAPES_FIRST ((unsigned char)'\x81') /* EBCDIC 'a' */ 34553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define ESCAPES_LAST ((unsigned char)'\xf9') /* EBCDIC '9' */ 34653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define UPPER_CASE(c) (c-32) 34753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 348f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 349f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic const short int escapes[] = { 35053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* 80 */ ESC_a, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0, 351f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* 88 */-ESC_h, 0, 0, '{', 0, 0, 0, 0, 3520ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes/* 90 */ 0, 0, -ESC_k, 0, 0, ESC_n, 0, -ESC_p, 353f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0, 354f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* A0 */ 0, '~', -ESC_s, ESC_tee, 0,-ESC_v, -ESC_w, 0, 355f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* A8 */ 0,-ESC_z, 0, 0, 0, '[', 0, 0, 356f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, 357f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-', 358f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G, 359f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* C8 */-ESC_H, 0, 0, 0, 0, 0, 0, 0, 360f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* D0 */ '}', 0, -ESC_K, 0, 0,-ESC_N, 0, -ESC_P, 361f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* D8 */-ESC_Q,-ESC_R, 0, 0, 0, 0, 0, 0, 362f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* E0 */ '\\', 0, -ESC_S, 0, 0,-ESC_V, -ESC_W, -ESC_X, 363f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0, 364f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0, 36553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* F8 */ 0, 0 366f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}; 3670ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes 3680ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes/* We also need a table of characters that may follow \c in an EBCDIC 3690ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughesenvironment for characters 0-31. */ 3700ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes 3710ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughesstatic unsigned char ebcdic_escape_c[] = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"; 3720ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes 37353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif /* EBCDIC */ 374f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 375f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 376f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Table of special "verbs" like (*PRUNE). This is a short table, so it is 377f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichsearched linearly. Put all the names into a single string, in order to reduce 378f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichthe number of relocations when a shared library is dynamically linked. The 379f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstring is built from string macros so that it works in UTF-8 mode on EBCDIC 380f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichplatforms. */ 381f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 382f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichtypedef struct verbitem { 383f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int len; /* Length of verb name */ 384f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int op; /* Op when no arg, or -1 if arg mandatory */ 385f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int op_arg; /* Op when arg present, or -1 if not allowed */ 386f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich} verbitem; 387f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 388f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic const char verbnames[] = 389f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich "\0" /* Empty name is a shorthand for MARK */ 390f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich STRING_MARK0 391f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich STRING_ACCEPT0 392f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich STRING_COMMIT0 393f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich STRING_F0 394f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich STRING_FAIL0 395f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich STRING_PRUNE0 396f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich STRING_SKIP0 397f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich STRING_THEN; 398f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 399f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic const verbitem verbs[] = { 400f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 0, -1, OP_MARK }, 401f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4, -1, OP_MARK }, 402f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6, OP_ACCEPT, -1 }, 403f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6, OP_COMMIT, -1 }, 404f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 1, OP_FAIL, -1 }, 405f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4, OP_FAIL, -1 }, 406f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5, OP_PRUNE, OP_PRUNE_ARG }, 407f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4, OP_SKIP, OP_SKIP_ARG }, 408f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4, OP_THEN, OP_THEN_ARG } 409f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}; 410f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 411f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic const int verbcount = sizeof(verbs)/sizeof(verbitem); 412f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 413f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 414f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Substitutes for [[:<:]] and [[:>:]], which mean start and end of word in 415f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichanother regex library. */ 416f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 41753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR sub_start_of_word[] = { 418f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_BACKSLASH, CHAR_b, CHAR_LEFT_PARENTHESIS, CHAR_QUESTION_MARK, 419f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_EQUALS_SIGN, CHAR_BACKSLASH, CHAR_w, CHAR_RIGHT_PARENTHESIS, '\0' }; 420f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 42153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR sub_end_of_word[] = { 422f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_BACKSLASH, CHAR_b, CHAR_LEFT_PARENTHESIS, CHAR_QUESTION_MARK, 423f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_LESS_THAN_SIGN, CHAR_EQUALS_SIGN, CHAR_BACKSLASH, CHAR_w, 424f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_RIGHT_PARENTHESIS, '\0' }; 425f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 426f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 427f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Tables of names of POSIX character classes and their lengths. The names are 428f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichnow all in a single string, to reduce the number of relocations when a shared 429f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichlibrary is dynamically loaded. The list of lengths is terminated by a zero 430f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichlength entry. The first three must be alpha, lower, upper, as this is assumed 431f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichfor handling case independence. The indices for graph, print, and punct are 432f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichneeded, so identify them. */ 433f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 434f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic const char posix_names[] = 435f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0 436f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0 437f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich STRING_graph0 STRING_print0 STRING_punct0 STRING_space0 438f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich STRING_word0 STRING_xdigit; 439f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 44053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const uint8_t posix_name_lengths[] = { 441f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 }; 442f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 443f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#define PC_GRAPH 8 444f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#define PC_PRINT 9 445f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#define PC_PUNCT 10 446f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 447f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 448f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Table of class bit maps for each POSIX class. Each class is formed from a 449f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbase map, with an optional addition or removal of another map. Then, for some 450f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichclasses, there is some additional tweaking: for [:blank:] the vertical space 451f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichcharacters are removed, and for [:alpha:] and [:alnum:] the underscore 452f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichcharacter is removed. The triples in the table consist of the base map offset, 453f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichsecond map offset or -1 if no second map, and a non-negative value for map 454f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichaddition or a negative value for map subtraction (if there are two maps). The 455f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichabsolute value of the third field has these meanings: 0 => no tweaking, 1 => 456f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichremove vertical space characters, 2 => remove underscore. */ 457f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 458f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic const int posix_class_maps[] = { 459f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cbit_word, cbit_digit, -2, /* alpha */ 460f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cbit_lower, -1, 0, /* lower */ 461f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cbit_upper, -1, 0, /* upper */ 462f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cbit_word, -1, 2, /* alnum - word without underscore */ 463f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cbit_print, cbit_cntrl, 0, /* ascii */ 464f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cbit_space, -1, 1, /* blank - a GNU extension */ 465f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cbit_cntrl, -1, 0, /* cntrl */ 466f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cbit_digit, -1, 0, /* digit */ 467f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cbit_graph, -1, 0, /* graph */ 468f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cbit_print, -1, 0, /* print */ 469f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cbit_punct, -1, 0, /* punct */ 470f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cbit_space, -1, 0, /* space */ 471f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cbit_word, -1, 0, /* word - a Perl extension */ 472f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cbit_xdigit,-1, 0 /* xdigit */ 473f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}; 474f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 47553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Table of substitutes for \d etc when PCRE2_UCP is set. They are replaced by 476f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichUnicode property escapes. */ 477f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 47853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 47953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_PNd[] = { 480f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, 481f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 48253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_pNd[] = { 483f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, 484f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 48553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_PXsp[] = { 486f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, 487f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 48853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_pXsp[] = { 489f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, 490f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 49153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_PXwd[] = { 492f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, 493f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 49453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_pXwd[] = { 495f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, 496f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 497f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 49853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic PCRE2_SPTR substitutes[] = { 499f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_PNd, /* \D */ 500f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_pNd, /* \d */ 501f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_PXsp, /* \S */ /* Xsp is Perl space, but from 8.34, Perl */ 502f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_pXsp, /* \s */ /* space and POSIX space are the same. */ 503f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_PXwd, /* \W */ 504f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_pXwd /* \w */ 505f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}; 506f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 507f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* The POSIX class substitutes must be in the order of the POSIX class names, 508f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichdefined above, and there are both positive and negative cases. NULL means no 509f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichgeneral substitute of a Unicode property escape (\p or \P). However, for some 510f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichPOSIX classes (e.g. graph, print, punct) a special property code is compiled 511f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichdirectly. */ 512f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 51353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_pCc[] = { 51453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, 51553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis CHAR_C, CHAR_c, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 51653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_pL[] = { 517f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, 518f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 51953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_pLl[] = { 520f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, 521f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 52253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_pLu[] = { 523f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, 524f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 52553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_pXan[] = { 526f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, 527f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 52853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_h[] = { 529f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_BACKSLASH, CHAR_h, '\0' }; 53053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_pXps[] = { 531f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, 532f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 53353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_PCc[] = { 53453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, 53553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis CHAR_C, CHAR_c, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 53653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_PL[] = { 537f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, 538f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 53953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_PLl[] = { 540f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, 541f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 54253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_PLu[] = { 543f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, 544f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 54553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_PXan[] = { 546f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, 547f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 54853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_H[] = { 549f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_BACKSLASH, CHAR_H, '\0' }; 55053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_PXps[] = { 551f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, 552f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' }; 553f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 55453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic PCRE2_SPTR posix_substitutes[] = { 555f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_pL, /* alpha */ 556f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_pLl, /* lower */ 557f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_pLu, /* upper */ 558f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_pXan, /* alnum */ 559f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich NULL, /* ascii */ 560f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_h, /* blank */ 56153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis string_pCc, /* cntrl */ 562f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_pNd, /* digit */ 563f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich NULL, /* graph */ 564f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich NULL, /* print */ 565f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich NULL, /* punct */ 566f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_pXps, /* space */ /* Xps is POSIX space, but from 8.34 */ 567f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_pXwd, /* word */ /* Perl and POSIX space are the same */ 568f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich NULL, /* xdigit */ 569f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Negated cases */ 570f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_PL, /* ^alpha */ 571f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_PLl, /* ^lower */ 572f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_PLu, /* ^upper */ 573f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_PXan, /* ^alnum */ 574f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich NULL, /* ^ascii */ 575f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_H, /* ^blank */ 57653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis string_PCc, /* ^cntrl */ 577f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_PNd, /* ^digit */ 578f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich NULL, /* ^graph */ 579f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich NULL, /* ^print */ 580f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich NULL, /* ^punct */ 581f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_PXps, /* ^space */ /* Xps is POSIX space, but from 8.34 */ 582f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich string_PXwd, /* ^word */ /* Perl and POSIX space are the same */ 583f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich NULL /* ^xdigit */ 584f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}; 58553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(PCRE2_UCHAR *)) 58653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif /* SUPPORT_UNICODE */ 58753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 58853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Masks for checking option settings. */ 58953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 59053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define PUBLIC_COMPILE_OPTIONS \ 59153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \ 59253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \ 59353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \ 59453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \ 59553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_NEVER_UCP|PCRE2_NEVER_UTF|PCRE2_NO_AUTO_CAPTURE| \ 59653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \ 59753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_NO_UTF_CHECK|PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \ 59853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_UTF) 59953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 60053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Compile time error code numbers. They are given names so that they can more 60153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiseasily be tracked. When a new number is added, the tables called eint1 and 60253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiseint2 in pcre2posix.c may need to be updated, and a new error text must be 60353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisadded to compile_error_texts in pcre2_error.c. */ 60453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 60553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisenum { ERR0 = COMPILE_ERROR_BASE, 60653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10, 60753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20, 60853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, ERR30, 60953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40, 61053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50, 61153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60, 61253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70, 61353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80, 61453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88 }; 61553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 61653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Error codes that correspond to negative error codes returned by 61753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfind_fixedlength(). */ 61853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 61953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic int fixed_length_errors[] = 620f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 62153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ERR0, /* Not an error */ 62253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ERR0, /* Not an error; -1 is used for "process later" */ 62353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ERR25, /* Lookbehind is not fixed length */ 62453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ERR36, /* \C in lookbehind is not allowed */ 62553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ERR87, /* Lookbehind is too long */ 62653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ERR86, /* Pattern too complicated */ 62753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ERR70 /* Internal error: unknown opcode encountered */ 62853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis }; 62953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 63053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This is a table of start-of-pattern options such as (*UTF) and settings such 63153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisas (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward 63253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscompatibility, (*UTFn) is supported in the relevant libraries, but (*UTF) is 63353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisgeneric and always supported. */ 63453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 63553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisenum { PSO_OPT, /* Value is an option bit */ 63653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PSO_FLG, /* Value is a flag bit */ 63753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PSO_NL, /* Value is a newline type */ 63853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PSO_BSR, /* Value is a \R type */ 63953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PSO_LIMM, /* Read integer value for match limit */ 64053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PSO_LIMR }; /* Read integer value for recursion limit */ 64153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 64253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskistypedef struct pso { 64353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis const uint8_t *name; 64453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint16_t length; 64553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint16_t type; 64653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t value; 64753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis} pso; 64853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 64953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* NB: STRING_UTFn_RIGHTPAR contains the length as well */ 65053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 65153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic pso pso_list[] = { 65253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { (uint8_t *)STRING_UTFn_RIGHTPAR, PSO_OPT, PCRE2_UTF }, 65353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { (uint8_t *)STRING_UTF_RIGHTPAR, 4, PSO_OPT, PCRE2_UTF }, 65453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { (uint8_t *)STRING_UCP_RIGHTPAR, 4, PSO_OPT, PCRE2_UCP }, 65553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { (uint8_t *)STRING_NOTEMPTY_RIGHTPAR, 9, PSO_FLG, PCRE2_NOTEMPTY_SET }, 65653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { (uint8_t *)STRING_NOTEMPTY_ATSTART_RIGHTPAR, 17, PSO_FLG, PCRE2_NE_ATST_SET }, 65753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { (uint8_t *)STRING_NO_AUTO_POSSESS_RIGHTPAR, 16, PSO_OPT, PCRE2_NO_AUTO_POSSESS }, 65853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { (uint8_t *)STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR, 18, PSO_OPT, PCRE2_NO_DOTSTAR_ANCHOR }, 65953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { (uint8_t *)STRING_NO_JIT_RIGHTPAR, 7, PSO_FLG, PCRE2_NOJIT }, 66053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE }, 66153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 }, 66253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { (uint8_t *)STRING_LIMIT_RECURSION_EQ, 16, PSO_LIMR, 0 }, 66353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { (uint8_t *)STRING_CR_RIGHTPAR, 3, PSO_NL, PCRE2_NEWLINE_CR }, 66453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { (uint8_t *)STRING_LF_RIGHTPAR, 3, PSO_NL, PCRE2_NEWLINE_LF }, 66553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { (uint8_t *)STRING_CRLF_RIGHTPAR, 5, PSO_NL, PCRE2_NEWLINE_CRLF }, 66653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { (uint8_t *)STRING_ANY_RIGHTPAR, 4, PSO_NL, PCRE2_NEWLINE_ANY }, 66753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { (uint8_t *)STRING_ANYCRLF_RIGHTPAR, 8, PSO_NL, PCRE2_NEWLINE_ANYCRLF }, 66853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { (uint8_t *)STRING_BSR_ANYCRLF_RIGHTPAR, 12, PSO_BSR, PCRE2_BSR_ANYCRLF }, 66953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { (uint8_t *)STRING_BSR_UNICODE_RIGHTPAR, 12, PSO_BSR, PCRE2_BSR_UNICODE } 670f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}; 671f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 672f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* This table is used when converting repeating opcodes into possessified 673f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichversions as a result of an explicit possessive quantifier such as ++. A zero 674f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichvalue means there is no possessified version - in those cases the item in 675f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichquestion must be wrapped in ONCE brackets. The table is truncated at OP_CALLOUT 676f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbecause all relevant opcodes are less than that. */ 677f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 67853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const uint8_t opcode_possessify[] = { 679f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 15 */ 680f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16 - 31 */ 681f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 682f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, /* NOTI */ 683f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_POSSTAR, 0, /* STAR, MINSTAR */ 684f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_POSPLUS, 0, /* PLUS, MINPLUS */ 685f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_POSQUERY, 0, /* QUERY, MINQUERY */ 686f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_POSUPTO, 0, /* UPTO, MINUPTO */ 687f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, /* EXACT */ 688f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, 0, 0, 0, /* POS{STAR,PLUS,QUERY,UPTO} */ 689f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 690f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_POSSTARI, 0, /* STARI, MINSTARI */ 691f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_POSPLUSI, 0, /* PLUSI, MINPLUSI */ 692f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_POSQUERYI, 0, /* QUERYI, MINQUERYI */ 693f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_POSUPTOI, 0, /* UPTOI, MINUPTOI */ 694f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, /* EXACTI */ 695f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, 0, 0, 0, /* POS{STARI,PLUSI,QUERYI,UPTOI} */ 696f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 697f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_NOTPOSSTAR, 0, /* NOTSTAR, NOTMINSTAR */ 698f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_NOTPOSPLUS, 0, /* NOTPLUS, NOTMINPLUS */ 699f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_NOTPOSQUERY, 0, /* NOTQUERY, NOTMINQUERY */ 700f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_NOTPOSUPTO, 0, /* NOTUPTO, NOTMINUPTO */ 701f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, /* NOTEXACT */ 702f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, 0, 0, 0, /* NOTPOS{STAR,PLUS,QUERY,UPTO} */ 703f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 704f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_NOTPOSSTARI, 0, /* NOTSTARI, NOTMINSTARI */ 705f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_NOTPOSPLUSI, 0, /* NOTPLUSI, NOTMINPLUSI */ 706f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_NOTPOSQUERYI, 0, /* NOTQUERYI, NOTMINQUERYI */ 707f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_NOTPOSUPTOI, 0, /* NOTUPTOI, NOTMINUPTOI */ 708f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, /* NOTEXACTI */ 709f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, 0, 0, 0, /* NOTPOS{STARI,PLUSI,QUERYI,UPTOI} */ 710f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 711f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_TYPEPOSSTAR, 0, /* TYPESTAR, TYPEMINSTAR */ 712f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_TYPEPOSPLUS, 0, /* TYPEPLUS, TYPEMINPLUS */ 713f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_TYPEPOSQUERY, 0, /* TYPEQUERY, TYPEMINQUERY */ 714f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_TYPEPOSUPTO, 0, /* TYPEUPTO, TYPEMINUPTO */ 715f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, /* TYPEEXACT */ 716f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, 0, 0, 0, /* TYPEPOS{STAR,PLUS,QUERY,UPTO} */ 717f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 718f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_CRPOSSTAR, 0, /* CRSTAR, CRMINSTAR */ 719f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_CRPOSPLUS, 0, /* CRPLUS, CRMINPLUS */ 720f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_CRPOSQUERY, 0, /* CRQUERY, CRMINQUERY */ 721f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_CRPOSRANGE, 0, /* CRRANGE, CRMINRANGE */ 722f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, 0, 0, 0, /* CRPOS{STAR,PLUS,QUERY,RANGE} */ 723f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 724f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, 0, 0, /* CLASS, NCLASS, XCLASS */ 725f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, 0, /* REF, REFI */ 726f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, 0, /* DNREF, DNREFI */ 727f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 0, 0 /* RECURSE, CALLOUT */ 728f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}; 729f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 730f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 731f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 732f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/************************************************* 7338b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis* Copy compiled code * 7348b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis*************************************************/ 7358b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis 7368b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis/* Compiled JIT code cannot be copied, so the new compiled block has no 7378b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisassociated JIT data. */ 7388b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis 7398b979b2abae173bb836d8e85a842cfd00447d4beJanis DanisevskisPCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION 7408b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskispcre2_code_copy(const pcre2_code *code) 7418b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis{ 7428b979b2abae173bb836d8e85a842cfd00447d4beJanis DanisevskisPCRE2_SIZE* ref_count; 7438b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskispcre2_code *newcode; 7448b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis 7458b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisif (code == NULL) return NULL; 7468b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisnewcode = code->memctl.malloc(code->blocksize, code->memctl.memory_data); 7478b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisif (newcode == NULL) return NULL; 7488b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskismemcpy(newcode, code, code->blocksize); 7498b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisnewcode->executable_jit = NULL; 7508b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis 7518b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis/* If the code is one that has been deserialized, increment the reference count 7528b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisin the decoded tables. */ 7538b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis 7548b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisif ((code->flags & PCRE2_DEREF_TABLES) != 0) 7558b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis { 7568b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis ref_count = (PCRE2_SIZE *)(code->tables + tables_length); 7578b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis (*ref_count)++; 7588b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis } 7598b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis 7608b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisreturn newcode; 7618b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis} 7628b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis 7638b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis 7648b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis 7658b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis/************************************************* 76653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Free compiled code * 767f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/ 768f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 76953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION 77053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispcre2_code_free(pcre2_code *code) 771f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{ 77253e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SIZE* ref_count; 77353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 77453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (code != NULL) 775f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 77653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (code->executable_jit != NULL) 77753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PRIV(jit_free)(code->executable_jit, &code->memctl); 77853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 77953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((code->flags & PCRE2_DEREF_TABLES) != 0) 78053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 78153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Decoded tables belong to the codes after deserialization, and they must 78253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis be freed when there are no more reference to them. The *ref_count should 78353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis always be > 0. */ 78453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 78553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ref_count = (PCRE2_SIZE *)(code->tables + tables_length); 78653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*ref_count > 0) 78753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 78853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (*ref_count)--; 78953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*ref_count == 0) 79053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code->memctl.free((void *)code->tables, code->memctl.memory_data); 79153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 79253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 79353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 79453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code->memctl.free(code, code->memctl.memory_data); 795f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 796f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich} 797f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 798f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 799f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 800f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/************************************************* 80153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Insert an automatic callout point * 802f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/ 803f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 80453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is called when the PCRE2_AUTO_CALLOUT option is set, to insert 80553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscallout points before each pattern item. 80653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 80753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments: 80853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code current code pointer 80953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr current pattern pointer 81053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb general compile-time data 811f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 81253e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns: new code pointer 813f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/ 814f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 81553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic PCRE2_UCHAR * 81653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisauto_callout(PCRE2_UCHAR *code, PCRE2_SPTR ptr, compile_block *cb) 817f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{ 81853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscode[0] = OP_CALLOUT; 81953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPUT(code, 1, ptr - cb->start_pattern); /* Pattern offset */ 82053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPUT(code, 1 + LINK_SIZE, 0); /* Default length */ 82153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscode[1 + 2*LINK_SIZE] = 255; 82253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn code + PRIV(OP_lengths)[OP_CALLOUT]; 823f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich} 824f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 825f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 826f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 827f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/************************************************* 82853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Complete a callout item * 829f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/ 830f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 83153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* A callout item contains the length of the next item in the pattern, which 83253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswe can't fill in till after we have reached the relevant point. This is used 83353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor both automatic and manual callouts. 834f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 835f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments: 83653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis previous_callout points to previous callout item 83753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr current pattern pointer 83853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb general compile-time data 839f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 84053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns: nothing 841f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/ 842f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 84353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic void 84453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscomplete_callout(PCRE2_UCHAR *previous_callout, PCRE2_SPTR ptr, 84553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis compile_block *cb) 846f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{ 8478b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskissize_t length = (size_t)(ptr - cb->start_pattern - GET(previous_callout, 1)); 84853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPUT(previous_callout, 1 + LINK_SIZE, length); 849f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich} 850f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 851f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 852f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 853f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/************************************************* 85453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Find the fixed length of a branch * 855f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/ 856f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 85753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Scan a branch and compute the fixed length of subject that will match it, if 85853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe length is fixed. This is needed for dealing with lookbehind assertions. In 85953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisUTF mode, the result is in code units rather than bytes. The branch is 86053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskistemporarily terminated with OP_END when this function is called. 861f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 86253e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisThis function is called when a lookbehind assertion is encountered, so that if 86353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisit fails, the error message can point to the correct place in the pattern. 86453e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisHowever, we cannot do this when the assertion contains subroutine calls, 86553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisbecause they can be forward references. We solve this by remembering this case 86653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisand doing the check at the end; a flag specifies which mode we are running in. 867f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 86853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisLookbehind lengths are held in 16-bit fields and the maximum value is defined 86953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisas LOOKBEHIND_MAX. 87053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 87153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments: 87253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code points to the start of the pattern (the bracket) 87353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis utf TRUE in UTF mode 87453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis atend TRUE if called when the pattern is complete 87553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb the "compile data" structure 87653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis recurses chain of recurse_check to catch mutual recursion 87753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis countptr pointer to counter, to catch over-complexity 87853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 87953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns: if non-negative, the fixed length, 88053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis or -1 if an OP_RECURSE item was encountered and atend is FALSE 88153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis or -2 if there is no fixed length, 8828b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis or -3 if \C was encountered (in UTF mode only) 8838b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis or -4 if length is too long 8848b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis or -5 if regex is too complicated 8858b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis or -6 if an unknown opcode was encountered (internal error) 886f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/ 887f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 88853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define FFL_LATER (-1) 88953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define FFL_NOTFIXED (-2) 89053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define FFL_BACKSLASHC (-3) 89153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define FFL_TOOLONG (-4) 89253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define FFL_TOOCOMPLICATED (-5) 89353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define FFL_UNKNOWNOP (-6) 89453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 895f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic int 89653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfind_fixedlength(PCRE2_UCHAR *code, BOOL utf, BOOL atend, compile_block *cb, 89753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis recurse_check *recurses, int *countptr) 898f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{ 8998b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisuint32_t length = 0xffffffffu; /* Unset */ 90053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t group = 0; 90153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t groupinfo = 0; 90253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisrecurse_check this_recurse; 9038b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisregister uint32_t branchlength = 0; 90453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisregister PCRE2_UCHAR *cc = code + 1 + LINK_SIZE; 905f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 90653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* If this is a capturing group, we may have the answer cached, but we can only 90753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuse this information if there are no (?| groups in the pattern, because 90853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisotherwise group numbers are not unique. */ 909f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 91053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (*code == OP_CBRA || *code == OP_CBRAPOS || *code == OP_SCBRA || 91153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *code == OP_SCBRAPOS) 91253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 91353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis group = GET2(cc, 0); 91453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cc += IMM2_SIZE; 91553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis groupinfo = cb->groupinfo[group]; 91653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((cb->external_flags & PCRE2_DUPCAPUSED) == 0) 91753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 91853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((groupinfo & GI_NOT_FIXED_LENGTH) != 0) return FFL_NOTFIXED; 91953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((groupinfo & GI_SET_FIXED_LENGTH) != 0) 92053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis return groupinfo & GI_FIXED_LENGTH_MASK; 92153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 92253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 923f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 92453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* A large and/or complex regex can take too long to process. This can happen 92553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismore often when (?| groups are present in the pattern. */ 926f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 92753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((*countptr)++ > 2000) return FFL_TOOCOMPLICATED; 928f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 92953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Scan along the opcodes for this branch. If we get to the end of the 93053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisbranch, check the length against that of the other branches. */ 931f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 93253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (;;) 933f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 93453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int d; 93553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_UCHAR *ce, *cs; 93653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis register PCRE2_UCHAR op = *cc; 937f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 93853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (branchlength > LOOKBEHIND_MAX) return FFL_TOOLONG; 93953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 94053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis switch (op) 941f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 94253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* We only need to continue for OP_CBRA (normal capturing bracket) and 94353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis OP_BRA (normal non-capturing bracket) because the other variants of these 94453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis opcodes are all concerned with unlimited repeated groups, which of course 94553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis are not of fixed length. */ 946f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 94753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CBRA: 94853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_BRA: 94953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_ONCE: 95053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_ONCE_NC: 95153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_COND: 95253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis d = find_fixedlength(cc, utf, atend, cb, recurses, countptr); 95353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (d < 0) return d; 9548b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis branchlength += (uint32_t)d; 95553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis do cc += GET(cc, 1); while (*cc == OP_ALT); 95653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cc += 1 + LINK_SIZE; 957f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 958f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 95953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Reached end of a branch; if it's a ket it is the end of a nested call. 96053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis If it's ALT it is an alternation in a nested call. An ACCEPT is effectively 96153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis an ALT. If it is END it's the end of the outer call. All can be handled by 96253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis the same code. Note that we must not include the OP_KETRxxx opcodes here, 96353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis because they all imply an unlimited repeat. */ 96453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 96553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_ALT: 96653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_KET: 96753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_END: 96853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_ACCEPT: 96953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_ASSERT_ACCEPT: 9708b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis if (length == 0xffffffffu) length = branchlength; 97153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (length != branchlength) goto ISNOTFIXED; 97253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*cc != OP_ALT) 973f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 97453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (group > 0) 975f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 9768b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis groupinfo |= (uint32_t)(GI_SET_FIXED_LENGTH | length); 97753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->groupinfo[group] = groupinfo; 978f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 9798b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis return (int)length; 980f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 98153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cc += 1 + LINK_SIZE; 98253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis branchlength = 0; 983f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 984f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 98553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* A true recursion implies not fixed length, but a subroutine call may 98653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis be OK. If the subroutine is a forward reference, we can't deal with 98753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis it until the end of the pattern, so return FFL_LATER. */ 98853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 98953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_RECURSE: 99053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!atend) return FFL_LATER; 99153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cs = ce = (PCRE2_UCHAR *)cb->start_code + GET(cc, 1); /* Start subpattern */ 99253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis do ce += GET(ce, 1); while (*ce == OP_ALT); /* End subpattern */ 99353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (cc > cs && cc < ce) goto ISNOTFIXED; /* Recursion */ 99453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else /* Check for mutual recursion */ 99553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 99653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis recurse_check *r = recurses; 99753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break; 99853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (r != NULL) goto ISNOTFIXED; /* Mutual recursion */ 99953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 100053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis this_recurse.prev = recurses; 100153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis this_recurse.group = cs; 100253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis d = find_fixedlength(cs, utf, atend, cb, &this_recurse, countptr); 100353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (d < 0) return d; 10048b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis branchlength += (uint32_t)d; 100553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cc += 1 + LINK_SIZE; 1006f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 1007f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 100853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Skip over assertive subpatterns. Note that we must increment cc by 100953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 1 + LINK_SIZE at the end, not by OP_length[*cc] because in a recursive 101053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis situation this assertion may be the one that is ultimately being checked 101153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for having a fixed length, in which case its terminating OP_KET will have 101253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis been temporarily replaced by OP_END. */ 1013f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 101453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_ASSERT: 101553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_ASSERT_NOT: 101653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_ASSERTBACK: 101753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_ASSERTBACK_NOT: 101853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis do cc += GET(cc, 1); while (*cc == OP_ALT); 101953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cc += 1 + LINK_SIZE; 102053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 1021f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 102253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Skip over things that don't match chars */ 1023f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 102453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_MARK: 102553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_PRUNE_ARG: 102653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_SKIP_ARG: 102753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_THEN_ARG: 102853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cc += cc[1] + PRIV(OP_lengths)[*cc]; 102953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 1030f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1031f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CALLOUT: 1032f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CIRC: 1033f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CIRCM: 1034f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CLOSE: 1035f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_COMMIT: 1036f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CREF: 103753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_FALSE: 103853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TRUE: 1039f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_DNCREF: 1040f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_DNRREF: 1041f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_DOLL: 1042f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_DOLLM: 1043f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_EOD: 1044f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_EODN: 1045f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_FAIL: 1046f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOT_WORD_BOUNDARY: 1047f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_PRUNE: 1048f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_REVERSE: 1049f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_RREF: 1050f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_SET_SOM: 1051f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_SKIP: 1052f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_SOD: 1053f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_SOM: 1054f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_THEN: 1055f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_WORD_BOUNDARY: 1056f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cc += PRIV(OP_lengths)[*cc]; 1057f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 1058f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 105953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CALLOUT_STR: 106053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cc += GET(cc, 1 + 2*LINK_SIZE); 106153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 106253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 1063f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Handle literal characters */ 1064f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1065f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CHAR: 1066f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CHARI: 1067f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOT: 1068f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTI: 1069f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich branchlength++; 1070f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cc += 2; 107153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 1072f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 1073f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 1074f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 1075f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1076f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Handle exact repetitions. The count is already in characters, but we 1077f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich need to skip over a multibyte character in UTF8 mode. */ 1078f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1079f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_EXACT: 1080f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_EXACTI: 1081f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTEXACT: 1082f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTEXACTI: 10838b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis branchlength += GET2(cc,1); 1084f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cc += 2 + IMM2_SIZE; 108553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 1086f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); 1087f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 1088f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 1089f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1090f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_TYPEEXACT: 1091f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich branchlength += GET2(cc,1); 1092f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (cc[1 + IMM2_SIZE] == OP_PROP || cc[1 + IMM2_SIZE] == OP_NOTPROP) 1093f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cc += 2; 1094f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cc += 1 + IMM2_SIZE + 1; 1095f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 1096f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1097f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Handle single-char matchers */ 1098f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1099f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_PROP: 1100f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTPROP: 1101f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cc += 2; 1102f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Fall through */ 1103f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1104f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_HSPACE: 1105f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_VSPACE: 1106f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOT_HSPACE: 1107f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOT_VSPACE: 1108f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOT_DIGIT: 1109f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_DIGIT: 1110f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOT_WHITESPACE: 1111f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_WHITESPACE: 1112f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOT_WORDCHAR: 1113f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_WORDCHAR: 1114f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_ANY: 1115f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_ALLANY: 1116f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich branchlength++; 1117f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cc++; 1118f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 1119f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 11208b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis /* The single-byte matcher isn't allowed. This only happens in UTF-8 or 11218b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis UTF-16 mode; otherwise \C is coded as OP_ALLANY. */ 1122f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1123f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_ANYBYTE: 112453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis return FFL_BACKSLASHC; 1125f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1126f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Check a class for variable quantification */ 1127f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1128f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CLASS: 1129f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NCLASS: 113053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_WIDE_CHARS 1131f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_XCLASS: 1132f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* The original code caused an unsigned overflow in 64 bit systems, 1133f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich so now we use a conditional statement. */ 1134f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (op == OP_XCLASS) 1135f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cc += GET(cc, 1); 1136f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 1137f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cc += PRIV(OP_lengths)[OP_CLASS]; 1138f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#else 1139f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cc += PRIV(OP_lengths)[OP_CLASS]; 1140f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 1141f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1142f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich switch (*cc) 1143f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 1144f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CRSTAR: 1145f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CRMINSTAR: 1146f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CRPLUS: 1147f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CRMINPLUS: 1148f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CRQUERY: 1149f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CRMINQUERY: 1150f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CRPOSSTAR: 1151f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CRPOSPLUS: 1152f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CRPOSQUERY: 115353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto ISNOTFIXED; 1154f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1155f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CRRANGE: 1156f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CRMINRANGE: 1157f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CRPOSRANGE: 115853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) goto ISNOTFIXED; 11598b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis branchlength += GET2(cc,1); 1160f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cc += 1 + 2 * IMM2_SIZE; 1161f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 1162f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1163f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich default: 1164f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich branchlength++; 1165f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 1166f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 1167f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1168f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Anything else is variable length */ 1169f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1170f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_ANYNL: 1171f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_BRAMINZERO: 1172f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_BRAPOS: 1173f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_BRAPOSZERO: 1174f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_BRAZERO: 1175f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CBRAPOS: 1176f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_EXTUNI: 1177f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_KETRMAX: 1178f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_KETRMIN: 1179f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_KETRPOS: 1180f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_MINPLUS: 1181f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_MINPLUSI: 1182f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_MINQUERY: 1183f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_MINQUERYI: 1184f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_MINSTAR: 1185f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_MINSTARI: 1186f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_MINUPTO: 1187f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_MINUPTOI: 1188f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTMINPLUS: 1189f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTMINPLUSI: 1190f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTMINQUERY: 1191f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTMINQUERYI: 1192f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTMINSTAR: 1193f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTMINSTARI: 1194f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTMINUPTO: 1195f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTMINUPTOI: 1196f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTPLUS: 1197f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTPLUSI: 1198f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTPOSPLUS: 1199f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTPOSPLUSI: 1200f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTPOSQUERY: 1201f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTPOSQUERYI: 1202f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTPOSSTAR: 1203f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTPOSSTARI: 1204f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTPOSUPTO: 1205f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTPOSUPTOI: 1206f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTQUERY: 1207f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTQUERYI: 1208f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTSTAR: 1209f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTSTARI: 1210f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTUPTO: 1211f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTUPTOI: 1212f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_PLUS: 1213f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_PLUSI: 1214f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_POSPLUS: 1215f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_POSPLUSI: 1216f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_POSQUERY: 1217f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_POSQUERYI: 1218f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_POSSTAR: 1219f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_POSSTARI: 1220f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_POSUPTO: 1221f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_POSUPTOI: 1222f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_QUERY: 1223f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_QUERYI: 1224f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_REF: 1225f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_REFI: 1226f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_DNREF: 1227f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_DNREFI: 1228f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_SBRA: 1229f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_SBRAPOS: 1230f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_SCBRA: 1231f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_SCBRAPOS: 1232f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_SCOND: 1233f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_SKIPZERO: 1234f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_STAR: 1235f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_STARI: 1236f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_TYPEMINPLUS: 1237f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_TYPEMINQUERY: 1238f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_TYPEMINSTAR: 1239f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_TYPEMINUPTO: 1240f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_TYPEPLUS: 1241f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_TYPEPOSPLUS: 1242f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_TYPEPOSQUERY: 1243f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_TYPEPOSSTAR: 1244f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_TYPEPOSUPTO: 1245f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_TYPEQUERY: 1246f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_TYPESTAR: 1247f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_TYPEUPTO: 1248f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_UPTO: 1249f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_UPTOI: 125053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto ISNOTFIXED; 1251f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1252f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Catch unrecognized opcodes so that when new ones are added they 1253f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich are not forgotten, as has happened in the past. */ 1254f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1255f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich default: 125653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis return FFL_UNKNOWNOP; 125753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 125853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 125953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Control never gets here except by goto. */ 126053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 126153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisISNOTFIXED: 126253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (group > 0) 126353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 126453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis groupinfo |= GI_NOT_FIXED_LENGTH; 126553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->groupinfo[group] = groupinfo; 126653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 126753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn FFL_NOTFIXED; 126853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis} 126953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 127053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 127153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 127253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/************************************************* 127353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Find first significant op code * 127453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*************************************************/ 127553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 127653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This is called by several functions that scan a compiled expression looking 127753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor a fixed first character, or an anchoring op code etc. It skips over things 127853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthat do not influence this. For some calls, it makes sense to skip negative 127953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisforward and all backward assertions, and also the \b assertion; for others it 128053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisdoes not. 128153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 128253e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments: 128353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code pointer to the start of the group 128453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis skipassert TRUE if certain assertions are to be skipped 128553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 128653e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns: pointer to the first significant opcode 128753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/ 128853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 128953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR* 129053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfirst_significant_code(PCRE2_SPTR code, BOOL skipassert) 129153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis{ 129253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (;;) 129353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 129453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis switch ((int)*code) 129553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 129653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_ASSERT_NOT: 129753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_ASSERTBACK: 129853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_ASSERTBACK_NOT: 129953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!skipassert) return code; 130053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis do code += GET(code, 1); while (*code == OP_ALT); 130153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code += PRIV(OP_lengths)[*code]; 130253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 130353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 130453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_WORD_BOUNDARY: 130553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOT_WORD_BOUNDARY: 130653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!skipassert) return code; 130753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Fall through */ 130853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 130953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CALLOUT: 131053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CREF: 131153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_DNCREF: 131253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_RREF: 131353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_DNRREF: 131453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_FALSE: 131553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TRUE: 131653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code += PRIV(OP_lengths)[*code]; 131753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 131853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 131953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CALLOUT_STR: 132053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code += GET(code, 1 + 2*LINK_SIZE); 132153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 132253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 132353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis default: 132453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis return code; 1325f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 1326f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 132753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Control never reaches here */ 1328f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich} 1329f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1330f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1331f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 133253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/************************************************* 133353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Scan compiled branch for non-emptiness * 133453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*************************************************/ 133553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 133653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function scans through a branch of a compiled pattern to see whether it 133753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscan match the empty string. It is called at the end of compiling to check the 133853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisentire pattern, and from compile_branch() when checking for an unlimited repeat 133953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisof a group that can match nothing. In the latter case it is called only when 134053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisdoing the real compile, not during the pre-compile that measures the size of 134153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe compiled pattern. 134253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 134353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisNote that first_significant_code() skips over backward and negative forward 134453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisassertions when its final argument is TRUE. If we hit an unclosed bracket, we 134553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn "empty" - this means we've struck an inner bracket whose current branch 134653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswill already have been scanned. 134753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 134853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments: 134953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code points to start of search 135053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis endcode points to where to stop 135153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis utf TRUE if in UTF mode 135253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb compile data 135353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis atend TRUE if being called to check an entire pattern 135453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis recurses chain of recurse_check to catch mutual recursion 135553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis countptr pointer to count to catch over-complicated pattern 135653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 135753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns: 0 if what is matched cannot be empty 135853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 1 if what is matched could be empty 135953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis -1 if the pattern is too complicated 136053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/ 136153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 136253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define CBE_NOTEMPTY 0 136353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define CBE_EMPTY 1 136453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define CBE_TOOCOMPLICATED (-1) 136553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 136653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 136753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic int 136853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscould_be_empty_branch(PCRE2_SPTR code, PCRE2_SPTR endcode, BOOL utf, 136953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis compile_block *cb, BOOL atend, recurse_check *recurses, int *countptr) 137053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis{ 137153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t group = 0; 137253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t groupinfo = 0; 137353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisregister PCRE2_UCHAR c; 137453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisrecurse_check this_recurse; 137553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 137653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* If what we are checking has already been set as "could be empty", we know 137753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe answer. */ 137853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 137953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (*code >= OP_SBRA && *code <= OP_SCOND) return CBE_EMPTY; 138053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 138153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* If this is a capturing group, we may have the answer cached, but we can only 138253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuse this information if there are no (?| groups in the pattern, because 138353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisotherwise group numbers are not unique. */ 138453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 138553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((cb->external_flags & PCRE2_DUPCAPUSED) == 0 && 138653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (*code == OP_CBRA || *code == OP_CBRAPOS)) 138753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 138853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis group = GET2(code, 1 + LINK_SIZE); 138953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis groupinfo = cb->groupinfo[group]; 139053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((groupinfo & GI_SET_COULD_BE_EMPTY) != 0) 139153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis return ((groupinfo & GI_COULD_BE_EMPTY) != 0)? CBE_EMPTY : CBE_NOTEMPTY; 139253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 139353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 139453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* A large and/or complex regex can take too long to process. We have to assume 139553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisit can match an empty string. This can happen more often when (?| groups are 139653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispresent in the pattern and the caching is disabled. Setting the cap at 1100 139753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisallows the test for more than 1023 capturing patterns to work. */ 139853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 139953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((*countptr)++ > 1100) return CBE_TOOCOMPLICATED; 140053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 140153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Scan the opcodes for this branch. */ 140253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 140353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); 140453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code < endcode; 140553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE)) 140653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 140753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_SPTR ccode; 140853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 140953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = *code; 141053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 141153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Skip over forward assertions; the other assertions are skipped by 141253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis first_significant_code() with a TRUE final argument. */ 141353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 141453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == OP_ASSERT) 141553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 141653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis do code += GET(code, 1); while (*code == OP_ALT); 141753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = *code; 141853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis continue; 141953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 142053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 142153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* For a recursion/subroutine call we can scan the recursion when this 142253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis function is called at the end, to check a complete pattern. Before then, 142353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis recursions just have the group number as their argument and in any case may 142453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis be forward references. In that situation, we return CBE_EMPTY, just in case. 142553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis It means that unlimited repeats of groups that contain recursions are always 142653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis treated as "could be empty" - which just adds a bit more processing time 142753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis because of the runtime check. */ 142853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 142953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == OP_RECURSE) 143053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 143153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_SPTR scode, endgroup; 143253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis BOOL empty_branch; 1433f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 143453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!atend) goto ISTRUE; 143553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis scode = cb->start_code + GET(code, 1); 143653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis endgroup = scode; 1437f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 143853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* We need to detect whether this is a recursive call, as otherwise there 143953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis will be an infinite loop. If it is a recursion, just skip over it. Simple 144053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis recursions are easily detected. For mutual recursions we keep a chain on 144153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis the stack. */ 1442f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 144353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT); 144453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (code >= scode && code <= endgroup) continue; /* Simple recursion */ 144553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 144653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 144753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis recurse_check *r = recurses; 144853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (r = recurses; r != NULL; r = r->prev) 144953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (r->group == scode) break; 145053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (r != NULL) continue; /* Mutual recursion */ 145153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 1452f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 145353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Scan the referenced group, remembering it on the stack chain to detect 145453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis mutual recursions. */ 1455f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 145653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis empty_branch = FALSE; 145753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis this_recurse.prev = recurses; 145853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis this_recurse.group = scode; 1459f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 146053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis do 146153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 146253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int rc = could_be_empty_branch(scode, endcode, utf, cb, atend, 146353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis &this_recurse, countptr); 146453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (rc < 0) return rc; 146553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (rc > 0) 146653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 146753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis empty_branch = TRUE; 146853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 146953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 147053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis scode += GET(scode, 1); 147153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 147253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (*scode == OP_ALT); 1473f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 147453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!empty_branch) goto ISFALSE; /* All branches are non-empty */ 147553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis continue; 147653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 1477f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 147853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Groups with zero repeats can of course be empty; skip them. */ 1479f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 148053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO || 148153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c == OP_BRAPOSZERO) 1482f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 1483f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code += PRIV(OP_lengths)[c]; 148453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis do code += GET(code, 1); while (*code == OP_ALT); 148553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = *code; 148653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis continue; 1487f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 1488f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 148953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* A nested group that is already marked as "could be empty" can just be 149053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis skipped. */ 1491f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 149253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == OP_SBRA || c == OP_SBRAPOS || 149353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c == OP_SCBRA || c == OP_SCBRAPOS) 1494f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 149553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis do code += GET(code, 1); while (*code == OP_ALT); 149653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = *code; 149753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis continue; 1498f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 1499f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 150053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* For other groups, scan the branches. */ 1501f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 150253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == OP_BRA || c == OP_BRAPOS || 150353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c == OP_CBRA || c == OP_CBRAPOS || 150453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c == OP_ONCE || c == OP_ONCE_NC || 150553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c == OP_COND || c == OP_SCOND) 1506f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 150753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis BOOL empty_branch; 150853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (GET(code, 1) == 0) goto ISTRUE; /* Hit unclosed bracket */ 1509f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 151053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If a conditional group has only one branch, there is a second, implied, 151153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis empty branch, so just skip over the conditional, because it could be empty. 151253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis Otherwise, scan the individual branches of the group. */ 1513f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 151453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == OP_COND && code[GET(code, 1)] != OP_ALT) 151553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code += GET(code, 1); 151653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 151753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 151853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis empty_branch = FALSE; 151953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis do 152053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 152153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!empty_branch) 152253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 152353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int rc = could_be_empty_branch(code, endcode, utf, cb, atend, 152453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis recurses, countptr); 152553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (rc < 0) return rc; 152653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (rc > 0) empty_branch = TRUE; 152753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 152853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code += GET(code, 1); 152953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 153053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (*code == OP_ALT); 153153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!empty_branch) goto ISFALSE; /* All branches are non-empty */ 1532f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 1533f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 153453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = *code; 153553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis continue; 153653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 1537f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 153853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Handle the other opcodes */ 153953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 154053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis switch (c) 154153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 154253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Check for quantifiers after a class. XCLASS is used for classes that 154353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cannot be represented just by a bit map. This includes negated single 154453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis high-valued characters. The length in PRIV(OP_lengths)[] is zero; the 154553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis actual length is stored in the compiled code, so we must update "code" 154653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis here. */ 1547f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 154853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 154953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_XCLASS: 155053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ccode = code += GET(code, 1); 155153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto CHECK_CLASS_REPEAT; 155253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 1553f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 155453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CLASS: 155553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NCLASS: 155653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ccode = code + PRIV(OP_lengths)[OP_CLASS]; 155753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 155853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 155953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis CHECK_CLASS_REPEAT: 156053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 156153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 156253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis switch (*ccode) 1563f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 156453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CRSTAR: /* These could be empty; continue */ 156553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CRMINSTAR: 156653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CRQUERY: 156753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CRMINQUERY: 156853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CRPOSSTAR: 156953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CRPOSQUERY: 157053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 157153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 157253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis default: /* Non-repeat => class must match */ 157353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CRPLUS: /* These repeats aren't empty */ 157453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CRMINPLUS: 157553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CRPOSPLUS: 157653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto ISFALSE; 157753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 157853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CRRANGE: 157953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CRMINRANGE: 158053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CRPOSRANGE: 158153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (GET2(ccode, 1) > 0) goto ISFALSE; /* Minimum > 0 */ 1582f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 1583f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 158453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 158553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 158653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Opcodes that must match a character */ 158753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 158853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_ANY: 158953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_ALLANY: 159053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_ANYBYTE: 159153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 159253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_PROP: 159353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTPROP: 159453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_ANYNL: 159553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 159653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOT_HSPACE: 159753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_HSPACE: 159853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOT_VSPACE: 159953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_VSPACE: 160053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_EXTUNI: 160153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 160253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOT_DIGIT: 160353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_DIGIT: 160453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOT_WHITESPACE: 160553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_WHITESPACE: 160653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOT_WORDCHAR: 160753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_WORDCHAR: 160853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 160953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CHAR: 161053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CHARI: 161153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOT: 161253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTI: 161353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 161453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_PLUS: 161553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_PLUSI: 161653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_MINPLUS: 161753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_MINPLUSI: 161853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 161953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTPLUS: 162053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTPLUSI: 162153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTMINPLUS: 162253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTMINPLUSI: 162353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 162453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_POSPLUS: 162553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_POSPLUSI: 162653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTPOSPLUS: 162753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTPOSPLUSI: 1628f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 162953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_EXACT: 163053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_EXACTI: 163153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTEXACT: 163253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTEXACTI: 1633f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 163453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEPLUS: 163553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEMINPLUS: 163653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEPOSPLUS: 163753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEEXACT: 163853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto ISFALSE; 1639f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 164053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* These are going to continue, as they may be empty, but we have to 164153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis fudge the length for the \p and \P cases. */ 1642f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 164353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPESTAR: 164453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEMINSTAR: 164553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEPOSSTAR: 164653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEQUERY: 164753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEMINQUERY: 164853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEPOSQUERY: 164953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; 165053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 1651f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 165253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Same for these */ 1653f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 165453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEUPTO: 165553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEMINUPTO: 165653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEPOSUPTO: 165753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) 165853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code += 2; 165953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 1660f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 166153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* End of branch */ 1662f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 166353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_KET: 166453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_KETRMAX: 166553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_KETRMIN: 166653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_KETRPOS: 166753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_ALT: 166853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto ISTRUE; 1669f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 167053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* In UTF-8 or UTF-16 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, 167153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis POSQUERY, UPTO, MINUPTO, and POSUPTO and their caseless and negative 167253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis versions may be followed by a multibyte character. */ 1673f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 167453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef MAYBE_UTF_MULTI 167553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_STAR: 167653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_STARI: 167753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTSTAR: 167853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTSTARI: 1679f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 168053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_MINSTAR: 168153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_MINSTARI: 168253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTMINSTAR: 168353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTMINSTARI: 1684f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 168553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_POSSTAR: 168653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_POSSTARI: 168753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTPOSSTAR: 168853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTPOSSTARI: 1689f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 169053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_QUERY: 169153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_QUERYI: 169253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTQUERY: 169353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTQUERYI: 1694f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 169553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_MINQUERY: 169653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_MINQUERYI: 169753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTMINQUERY: 169853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTMINQUERYI: 1699f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 170053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_POSQUERY: 170153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_POSQUERYI: 170253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTPOSQUERY: 170353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTPOSQUERYI: 170453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]); 170553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 1706f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 170753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_UPTO: 170853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_UPTOI: 170953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTUPTO: 171053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTUPTOI: 1711f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 171253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_MINUPTO: 171353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_MINUPTOI: 171453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTMINUPTO: 171553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTMINUPTOI: 171653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 171753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_POSUPTO: 171853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_POSUPTOI: 171953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTPOSUPTO: 172053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTPOSUPTOI: 172153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]); 172253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 172353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif /* MAYBE_UTF_MULTI */ 172453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 172553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* MARK, and PRUNE/SKIP/THEN with an argument must skip over the argument 172653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis string. */ 172753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 172853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_MARK: 172953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_PRUNE_ARG: 173053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_SKIP_ARG: 173153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_THEN_ARG: 173253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code += code[1]; 173353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 173453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 173553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* None of the remaining opcodes are required to match a character. */ 173653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 173753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis default: 173853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 1739f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 1740f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 174153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 174253e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisISTRUE: 174353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisgroupinfo |= GI_COULD_BE_EMPTY; 174453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 174553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisISFALSE: 174653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (group > 0) cb->groupinfo[group] = groupinfo | GI_SET_COULD_BE_EMPTY; 174753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 174853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn ((groupinfo & GI_COULD_BE_EMPTY) != 0)? CBE_EMPTY : CBE_NOTEMPTY; 1749f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich} 1750f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1751f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1752f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1753f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/************************************************* 175453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Check for counted repeat * 1755f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/ 1756f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 175753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is called when a '{' is encountered in a place where it might 175853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstart a quantifier. It looks ahead to see if it really is a quantifier, that 175953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisis, one of the forms {ddd} {ddd,} or {ddd,ddd} where the ddds are digits. 1760f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 176153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArgument: pointer to the first char after '{' 176253e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns: TRUE or FALSE 1763f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/ 1764f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1765f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic BOOL 176653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisis_counted_repeat(PCRE2_SPTR p) 1767f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{ 176853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (!IS_DIGIT(*p)) return FALSE; 176953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisp++; 177053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswhile (IS_DIGIT(*p)) p++; 177153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE; 1772f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 177353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (*p++ != CHAR_COMMA) return FALSE; 177453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE; 1775f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 177653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (!IS_DIGIT(*p)) return FALSE; 177753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisp++; 177853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswhile (IS_DIGIT(*p)) p++; 1779f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 178053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn (*p == CHAR_RIGHT_CURLY_BRACKET); 178153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis} 1782f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1783f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1784f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 178553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/************************************************* 178653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Handle escapes * 178753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*************************************************/ 178853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 178953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is called when a \ has been encountered. It either returns a 179053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispositive value for a simple escape such as \d, or 0 for a data character, which 179153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisis placed in chptr. A backreference to group n is returned as negative n. On 179253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisentry, ptr is pointing at the \. On exit, it points the final code unit of the 179353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisescape sequence. 1794f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 179553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisThis function is also called from pcre2_substitute() to handle escape sequences 179653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisin replacement strings. In this case, the cb argument is NULL, and only 179753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissequences that define a data character are recognised. The isclass argument is 179853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnot relevant, but the options argument is the final value of the compiled 179953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispattern's options. 180053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 180153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisThere is one "trick" case: when a sequence such as [[:>:]] or \s in UCP mode is 180253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisprocessed, it is replaced by a nested alternative sequence. If this contains a 180353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisbackslash (which is usually does), ptrend does not point to its end - it still 180453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispoints to the end of the whole pattern. However, we can detect this case 180553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisbecause cb->nestptr[0] will be non-NULL. The nested sequences are all zero- 180653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisterminated and there are only ever two levels of nesting. 180753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 180853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments: 180953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptrptr points to the input position pointer 181053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptrend points to the end of the input 181153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis chptr points to a returned data character 181253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcodeptr points to the errorcode variable (containing zero) 181353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis options the current options bits 181453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis isclass TRUE if inside a character class 181553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb compile data block 181653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 181753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns: zero => a data character 181853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis positive => a special escape sequence 181953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis negative => a back reference 182053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis on error, errorcodeptr is set non-zero 182153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/ 1822f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 182353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint 182453e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr, 182553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int *errorcodeptr, uint32_t options, BOOL isclass, compile_block *cb) 182653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis{ 182753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisBOOL utf = (options & PCRE2_UTF) != 0; 182853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR ptr = *ptrptr + 1; 182953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisregister uint32_t c, cc; 183053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint escape = 0; 183153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint i; 1832f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 183353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Find the end of a nested insert. */ 1834f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 183553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (cb != NULL && cb->nestptr[0] != NULL) 183653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptrend = ptr + PRIV(strlen)(ptr); 1837f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 183853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* If backslash is at the end of the string, it's an error. */ 1839f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 184053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (ptr >= ptrend) 184153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 184253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR1; 184353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis return 0; 184453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 1845f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 184653e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisGETCHARINCTEST(c, ptr); /* Get character value, increment pointer */ 184753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisptr--; /* Set pointer back to the last code unit */ 1848f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 184953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Non-alphanumerics are literals, so we just leave the value in c. An initial 185053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisvalue test saves a memory lookup for code points outside the alphanumeric 185153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisrange. Otherwise, do a table lookup. A non-zero result is something that can be 185253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturned immediately. Otherwise further processing is required. */ 1853f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 185453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (c < ESCAPES_FIRST || c > ESCAPES_LAST) {} /* Definitely literal */ 1855f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 185653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiselse if ((i = escapes[c - ESCAPES_FIRST]) != 0) 185753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 185853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (i > 0) c = (uint32_t)i; else /* Positive is a data character */ 1859f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 186053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis escape = -i; /* Else return a special escape */ 186153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (escape == ESC_P || escape == ESC_p || escape == ESC_X) 186253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->external_flags |= PCRE2_HASBKPORX; /* Note \P, \p, or \X */ 1863f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 186453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 1865f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 186653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Escapes that need further processing, including those that are unknown. 186753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisWhen called from pcre2_substitute(), only \c, \o, and \x are recognized (and \u 186853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswhen BSUX is set). */ 1869f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 187053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiselse 187153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 187253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_SPTR oldptr; 187353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis BOOL braced, negated, overflow; 187453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis unsigned int s; 187553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 187653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Filter calls from pcre2_substitute(). */ 187753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 187853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (cb == NULL && c != CHAR_c && c != CHAR_o && c != CHAR_x && 187953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (c != CHAR_u || (options & PCRE2_ALT_BSUX) != 0)) 1880f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 188153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR3; 188253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis return 0; 1883f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 1884f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 188553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis switch (c) 1886f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 188753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* A number of Perl escapes are not handled by PCRE. We give an explicit 188853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis error. */ 1889f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 189053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_l: 189153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_L: 189253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR37; 189353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 1894f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 189553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* \u is unrecognized when PCRE2_ALT_BSUX is not set. When it is treated 189653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis specially, \u must be followed by four hex digits. Otherwise it is a 189753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis lowercase u letter. */ 189853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 189953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_u: 190053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_ALT_BSUX) == 0) *errorcodeptr = ERR37; else 1901f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 190253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t xc; 190353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((cc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */ 190453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((xc = XDIGIT(ptr[2])) == 0xff) break; /* Not a hex digit */ 190553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cc = (cc << 4) | xc; 190653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((xc = XDIGIT(ptr[3])) == 0xff) break; /* Not a hex digit */ 190753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cc = (cc << 4) | xc; 190853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((xc = XDIGIT(ptr[4])) == 0xff) break; /* Not a hex digit */ 190953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = (cc << 4) | xc; 191053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += 4; 191153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (utf) 1912f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 191353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c > 0x10ffffU) *errorcodeptr = ERR77; 191453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73; 1915f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 191653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (c > MAX_NON_UTF_CHAR) *errorcodeptr = ERR77; 1917f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 191853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 1919f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 192053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_U: 192153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* \U is unrecognized unless PCRE2_ALT_BSUX is set, in which case it is an 192253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis upper case letter. */ 192353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_ALT_BSUX) == 0) *errorcodeptr = ERR37; 192453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 1925f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 192653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* In a character class, \g is just a literal "g". Outside a character 192753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis class, \g must be followed by one of a number of specific things: 1928f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 192953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (1) A number, either plain or braced. If positive, it is an absolute 193053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis backreference. If negative, it is a relative backreference. This is a Perl 193153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 5.10 feature. 1932f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 193353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (2) Perl 5.10 also supports \g{name} as a reference to a named group. This 193453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis is part of Perl's movement towards a unified syntax for back references. As 193553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis this is synonymous with \k{name}, we fudge it up by pretending it really 193653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis was \k. 1937f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 193853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (3) For Oniguruma compatibility we also support \g followed by a name or a 193953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis number either in angle brackets or in single quotes. However, these are 194053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (possibly recursive) subroutine calls, _not_ backreferences. Just return 194153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis the ESC_g code (cf \k). */ 1942f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 194353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_g: 194453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (isclass) break; 194553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE) 194653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 194753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis escape = ESC_g; 194853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 194953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 1950f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 195153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Handle the Perl-compatible cases */ 195253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 195353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[1] == CHAR_LEFT_CURLY_BRACKET) 1954f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 195553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_SPTR p; 195653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (p = ptr+2; *p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET; p++) 195753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break; 195853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET) 195953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 196053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis escape = ESC_k; 196153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 196253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 196353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis braced = TRUE; 196453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 196553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 196653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else braced = FALSE; 196753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 196853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[1] == CHAR_MINUS) 196953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 197053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis negated = TRUE; 197153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 197253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 197353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else negated = FALSE; 197453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 197553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* The integer range is limited by the machine's int representation. */ 197653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis s = 0; 197753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis overflow = FALSE; 197853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (IS_DIGIT(ptr[1])) 197953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 198053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (s > INT_MAX / 10 - 1) /* Integer overflow */ 198153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 198253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis overflow = TRUE; 198353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 198453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 19858b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis s = s * 10 + (unsigned int)(*(++ptr) - CHAR_0); 198653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 198753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (overflow) /* Integer overflow */ 198853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 198953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (IS_DIGIT(ptr[1])) ptr++; 199053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR61; 1991f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 199253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 1993f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 199453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (braced && *(++ptr) != CHAR_RIGHT_CURLY_BRACKET) 199553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 199653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR57; 199753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 199853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 1999f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 200053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (s == 0) 200153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 200253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR58; 2003f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 2004f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 2005f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 200653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (negated) 200753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 200853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (s > cb->bracount) 200953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 201053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR15; 201153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 201253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 201353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis s = cb->bracount - (s - 1); 201453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 2015f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 201653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis escape = -(int)s; 201753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 2018f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 201953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* The handling of escape sequences consisting of a string of digits 202053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis starting with one that is not zero is not straightforward. Perl has changed 202153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis over the years. Nowadays \g{} for backreferences and \o{} for octal are 202253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis recommended to avoid the ambiguities in the old syntax. 2023f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 202453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis Outside a character class, the digits are read as a decimal number. If the 202553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis number is less than 10, or if there are that many previous extracting left 202653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis brackets, it is a back reference. Otherwise, up to three octal digits are 202753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis read to form an escaped character code. Thus \123 is likely to be octal 123 202853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (cf \0123, which is octal 012 followed by the literal 3). 2029f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 203053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis Inside a character class, \ followed by a digit is always either a literal 203153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 8 or 9 or an octal number. */ 2032f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 203353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5: 203453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9: 2035f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 203653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!isclass) 203753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 203853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis oldptr = ptr; 203953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* The integer range is limited by the machine's int representation. */ 204053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis s = c - CHAR_0; 204153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis overflow = FALSE; 204253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (IS_DIGIT(ptr[1])) 204353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 204453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (s > INT_MAX / 10 - 1) /* Integer overflow */ 204553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 204653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis overflow = TRUE; 204753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 204853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 20498b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis s = s * 10 + (unsigned int)(*(++ptr) - CHAR_0); 205053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 205153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (overflow) /* Integer overflow */ 205253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 205353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (IS_DIGIT(ptr[1])) ptr++; 205453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR61; 205553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 205653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 2057f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 205853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* \1 to \9 are always back references. \8x and \9x are too; \1x to \7x 205953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis are octal escapes if there are not that many previous captures. */ 2060f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 206153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (s < 10 || *oldptr >= CHAR_8 || s <= cb->bracount) 206253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 206353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis escape = -(int)s; /* Indicates a back reference */ 206453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 206553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 206653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr = oldptr; /* Put the pointer back and fall through */ 206753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 2068f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 206953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Handle a digit following \ when the number is not a back reference, or 207053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis we are within a character class. If the first digit is 8 or 9, Perl used to 207153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis generate a binary zero byte and then treat the digit as a following 207253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis literal. At least by Perl 5.18 this changed so as not to insert the binary 207353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zero. */ 2074f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 207553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((c = *ptr) >= CHAR_8) break; 2076f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 207753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Fall through with a digit less than 8 */ 2078f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 207953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* \0 always starts an octal number, but we may drop through to here with a 208053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis larger first octal digit. The original code used just to take the least 208153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis significant 8 bits of octal numbers (I think this is what early Perls used 208253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis to do). Nowadays we allow for larger numbers in UTF-8 mode and 16-bit mode, 208353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis but no more than 3 octal digits. */ 2084f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 208553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_0: 208653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c -= CHAR_0; 208753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7) 208853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = c * 8 + *(++ptr) - CHAR_0; 208953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if PCRE2_CODE_UNIT_WIDTH == 8 209053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!utf && c > 0xff) *errorcodeptr = ERR51; 209153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 2092f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 2093f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 209453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* \o is a relatively new Perl feature, supporting a more general way of 209553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis specifying character codes in octal. The only supported form is \o{ddd}. */ 2096f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 209753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_o: 209853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[1] != CHAR_LEFT_CURLY_BRACKET) *errorcodeptr = ERR55; else 209953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[2] == CHAR_RIGHT_CURLY_BRACKET) *errorcodeptr = ERR78; else 210053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 210153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += 2; 210253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = 0; 210353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis overflow = FALSE; 210453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (*ptr >= CHAR_0 && *ptr <= CHAR_7) 210553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 210653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cc = *ptr++; 210753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */ 210853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if PCRE2_CODE_UNIT_WIDTH == 32 210953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c >= 0x20000000l) { overflow = TRUE; break; } 211053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 211153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = (c << 3) + (cc - CHAR_0); 211253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if PCRE2_CODE_UNIT_WIDTH == 8 211353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; } 211453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#elif PCRE2_CODE_UNIT_WIDTH == 16 211553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; } 211653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#elif PCRE2_CODE_UNIT_WIDTH == 32 211753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (utf && c > 0x10ffffU) { overflow = TRUE; break; } 211853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 211953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 212053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (overflow) 212153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 212253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (*ptr >= CHAR_0 && *ptr <= CHAR_7) ptr++; 212353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR34; 212453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 212553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (*ptr == CHAR_RIGHT_CURLY_BRACKET) 212653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 212753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73; 212853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 212953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else *errorcodeptr = ERR64; 213053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 2131f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 2132f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 213353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* \x is complicated. When PCRE2_ALT_BSUX is set, \x must be followed by 213453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis two hexadecimal digits. Otherwise it is a lowercase x letter. */ 2135f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 213653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_x: 213753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_ALT_BSUX) != 0) 213853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 213953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t xc; 214053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((cc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */ 214153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((xc = XDIGIT(ptr[2])) == 0xff) break; /* Not a hex digit */ 214253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = (cc << 4) | xc; 214353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += 2; 214453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } /* End PCRE2_ALT_BSUX handling */ 2145f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 214653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Handle \x in Perl's style. \x{ddd} is a character number which can be 214753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis greater than 0xff in UTF-8 or non-8bit mode, but only if the ddd are hex 214853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis digits. If not, { used to be treated as a data character. However, Perl 214953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis seems to read hex digits up to the first non-such, and ignore the rest, so 215053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis that, for example \x{zz} matches a binary zero. This seems crazy, so PCRE 215153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis now gives an error. */ 2152f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 215353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 215453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 215553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[1] == CHAR_LEFT_CURLY_BRACKET) 215653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 215753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += 2; 215853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*ptr == CHAR_RIGHT_CURLY_BRACKET) 215953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 216053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR78; 216153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 216253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 216353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = 0; 216453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis overflow = FALSE; 2165f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 216653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while ((cc = XDIGIT(*ptr)) != 0xff) 216753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 216853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 216953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == 0 && cc == 0) continue; /* Leading zeroes */ 217053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if PCRE2_CODE_UNIT_WIDTH == 32 217153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c >= 0x10000000l) { overflow = TRUE; break; } 217253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 217353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = (c << 4) | cc; 217453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((utf && c > 0x10ffffU) || (!utf && c > MAX_NON_UTF_CHAR)) 217553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 217653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis overflow = TRUE; 217753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 217853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 217953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 2180f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 218153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (overflow) 218253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 218353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (XDIGIT(*ptr) != 0xff) ptr++; 218453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR34; 218553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 218653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (*ptr == CHAR_RIGHT_CURLY_BRACKET) 218753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 218853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73; 218953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 2190f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 219153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If the sequence of hex digits does not end with '}', give an error. 219253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis We used just to recognize this construct and fall through to the normal 219353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis \x handling, but nowadays Perl gives an error, which seems much more 219453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis sensible, so we do too. */ 2195f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 219653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else *errorcodeptr = ERR67; 219753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } /* End of \x{} processing */ 2198f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 219953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Read a single-byte hex-defined char (up to two hex digits after \x) */ 2200f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 220153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 220253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 220353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = 0; 220453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((cc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */ 220553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 220653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = cc; 220753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((cc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */ 220853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 220953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = (c << 4) | cc; 221053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } /* End of \xdd handling */ 221153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } /* End of Perl-style \x handling */ 2212f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 2213f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 221453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* The handling of \c is different in ASCII and EBCDIC environments. In an 221553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ASCII (or Unicode) environment, an error is given if the character 221653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis following \c is not a printable ASCII character. Otherwise, the following 221753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis character is upper-cased if it is a letter, and after that the 0x40 bit is 221853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis flipped. The result is the value of the escape. 2219f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 222053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis In an EBCDIC environment the handling of \c is compatible with the 222153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis specification in the perlebcdic document. The following character must be 222253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis a letter or one of small number of special characters. These provide a 222353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis means of defining the character values 0-31. 2224f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 222553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis For testing the EBCDIC handling of \c in an ASCII environment, recognize 222653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis the EBCDIC value of 'c' explicitly. */ 2227f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 222853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if defined EBCDIC && 'a' != 0x81 222953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case 0x83: 223053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else 223153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_c: 2232f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 2233f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 223453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = *(++ptr); 223553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c >= CHAR_a && c <= CHAR_z) c = UPPER_CASE(c); 223653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == CHAR_NULL && ptr >= ptrend) 223753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 223853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR2; 223953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 224053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 2241f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 224253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Handle \c in an ASCII/Unicode environment. */ 2243f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 224453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifndef EBCDIC /* ASCII/UTF-8 coding */ 224553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c < 32 || c > 126) /* Excludes all non-printable ASCII */ 224653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 224753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR68; 224853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 224953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 225053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c ^= 0x40; 2251f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 225253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Handle \c in an EBCDIC environment. The special case \c? is converted to 225353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 255 (0xff) or 95 (0x5f) if other character suggest we are using th POSIX-BC 225453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis encoding. (This is the way Perl indicates that it handles \c?.) The other 225553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis valid sequences correspond to a list of specific characters. */ 2256f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 225753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else 225853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == CHAR_QUESTION_MARK) 225953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = ('\\' == 188 && '`' == 74)? 0x5f : 0xff; 226053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 226153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 226253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (i = 0; i < 32; i++) 226353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 226453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == ebcdic_escape_c[i]) break; 226553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 226653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (i < 32) c = i; else *errorcodeptr = ERR68; 226753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 226853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif /* EBCDIC */ 2269f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 227053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 2271f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 227253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Any other alphanumeric following \ is an error. Perl gives an error only 227353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if in warning mode, but PCRE doesn't have a warning mode. */ 2274f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 227553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis default: 227653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR3; 227753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 227853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 2279f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 2280f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 228153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Perl supports \N{name} for character names, as well as plain \N for "not 228253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnewline". PCRE does not support \N{name}. However, it does support 228353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisquantification such as \N{2,3}. */ 2284f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 228553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (escape == ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET && 228653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis !is_counted_repeat(ptr+2)) 228753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR37; 2288f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 228953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* If PCRE2_UCP is set, we change the values for \d etc. */ 2290f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 229153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((options & PCRE2_UCP) != 0 && escape >= ESC_D && escape <= ESC_w) 229253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis escape += (ESC_DU - ESC_D); 2293f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 229453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Set the pointer to the final character before returning. */ 2295f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 229653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*ptrptr = ptr; 229753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*chptr = c; 229853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn escape; 2299f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich} 2300f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2301f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2302f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 230353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 2304f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/************************************************* 230553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Handle \P and \p * 2306f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/ 2307f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 230853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is called after \P or \p has been encountered, provided that 230953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2 is compiled with support for UTF and Unicode properties. On entry, the 231053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscontents of ptrptr are pointing at the P or p. On exit, it is left pointing at 231153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe final code unit of the escape sequence. 2312f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2313f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments: 231453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptrptr the pattern position pointer 231553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis negptr a boolean that is set TRUE for negation else FALSE 231653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptypeptr an unsigned int that is set to the type value 231753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis pdataptr an unsigned int that is set to the detailed property value 231853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcodeptr the error code variable 231953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb the compile data 2320f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 232153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns: TRUE if the type value was found, or FALSE for an invalid type 2322f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/ 2323f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2324f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic BOOL 232553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisget_ucp(PCRE2_SPTR *ptrptr, BOOL *negptr, unsigned int *ptypeptr, 232653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis unsigned int *pdataptr, int *errorcodeptr, compile_block *cb) 2327f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{ 232853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisregister PCRE2_UCHAR c; 23298b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskissize_t i, bot, top; 233053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR ptr = *ptrptr; 233153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR name[32]; 2332f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 233353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*negptr = FALSE; 233453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisc = *(++ptr); 2335f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 233653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* \P or \p can be followed by a name in {}, optionally preceded by ^ for 233753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnegation. */ 2338f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 233953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (c == CHAR_LEFT_CURLY_BRACKET) 234053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 234153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[1] == CHAR_CIRCUMFLEX_ACCENT) 234253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 234353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *negptr = TRUE; 234453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 234553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 234653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (i = 0; i < (int)(sizeof(name) / sizeof(PCRE2_UCHAR)) - 1; i++) 234753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 234853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = *(++ptr); 234953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == CHAR_NULL) goto ERROR_RETURN; 235053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == CHAR_RIGHT_CURLY_BRACKET) break; 235153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis name[i] = c; 235253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 235353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN; 235453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis name[i] = 0; 235553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 2356f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 235753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Otherwise there is just one following character, which must be an ASCII 235853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisletter. */ 2359f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 236053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiselse if (MAX_255(c) && (cb->ctypes[c] & ctype_letter) != 0) 236153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 236253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis name[0] = c; 236353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis name[1] = 0; 236453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 236553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiselse goto ERROR_RETURN; 2366f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 236753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*ptrptr = ptr; 2368f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 236953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Search for a recognized property name using binary chop. */ 2370f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 237153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisbot = 0; 237253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskistop = PRIV(utt_size); 2373f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 237453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswhile (bot < top) 237553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 237653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int r; 237753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis i = (bot + top) >> 1; 237853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis r = PRIV(strcmp_c8)(name, PRIV(utt_names) + PRIV(utt)[i].name_offset); 237953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (r == 0) 2380f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 238153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *ptypeptr = PRIV(utt)[i].type; 238253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *pdataptr = PRIV(utt)[i].value; 238353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis return TRUE; 2384f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 238553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (r > 0) bot = i + 1; else top = i; 2386f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 238753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*errorcodeptr = ERR47; /* Unrecognized name */ 238853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn FALSE; 2389f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 239053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisERROR_RETURN: /* Malformed \P or \p */ 239153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*errorcodeptr = ERR46; 239253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*ptrptr = ptr; 2393f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichreturn FALSE; 2394f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich} 239553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 2396f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2397f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2398f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2399f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/************************************************* 240053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Read repeat counts * 2401f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/ 2402f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 240353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Read an item of the form {n,m} and return the values. This is called only 240453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisafter is_counted_repeat() has confirmed that a repeat-count quantifier exists, 240553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisso the syntax is guaranteed to be correct, but we need to check the values. 2406f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2407f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments: 240853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis p pointer to first char after '{' 240953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis minp pointer to int for min 241053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis maxp pointer to int for max 241153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis returned as -1 if no max 241253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcodeptr points to error code variable 241353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 241453e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns: pointer to '}' on success; 241553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis current ptr on error, with errorcodeptr set non-zero 2416f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/ 2417f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 241853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic PCRE2_SPTR 241953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisread_repeat_counts(PCRE2_SPTR p, int *minp, int *maxp, int *errorcodeptr) 2420f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{ 242153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint min = 0; 242253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint max = -1; 2423f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 242453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswhile (IS_DIGIT(*p)) 2425f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 242653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis min = min * 10 + (int)(*p++ - CHAR_0); 242753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (min > 65535) 2428f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 242953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR5; 243053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis return p; 2431f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 2432f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 2433f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 243453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else 2435f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 243653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*(++p) != CHAR_RIGHT_CURLY_BRACKET) 2437f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 243853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis max = 0; 243953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while(IS_DIGIT(*p)) 244053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 244153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis max = max * 10 + (int)(*p++ - CHAR_0); 244253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (max > 65535) 244353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 244453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR5; 244553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis return p; 244653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 244753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 244853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (max < min) 244953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 245053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR4; 245153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis return p; 245253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 2453f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 2454f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 245553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 245653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*minp = min; 245753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*maxp = max; 245853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn p; 2459f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich} 2460f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2461f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2462f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2463f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/************************************************* 246453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Scan compiled regex for recursion reference * 2465f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/ 2466f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 246753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function scans through a compiled pattern until it finds an instance of 246853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisOP_RECURSE. 2469f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2470f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments: 247153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code points to start of expression 247253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis utf TRUE in UTF mode 2473f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 247453e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns: pointer to the opcode for OP_RECURSE, or NULL if not found 2475f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/ 2476f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 247753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic PCRE2_SPTR 247853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfind_recurse(PCRE2_SPTR code, BOOL utf) 2479f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{ 248053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (;;) 2481f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 248253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis register PCRE2_UCHAR c = *code; 248353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == OP_END) return NULL; 248453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == OP_RECURSE) return code; 2485f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 248653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* XCLASS is used for classes that cannot be represented just by a bit map. 248753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis This includes negated single high-valued characters. CALLOUT_STR is used for 248853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis callouts with string arguments. In both cases the length in the table is 248953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zero; the actual length is stored in the compiled code. */ 2490f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 249153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == OP_XCLASS) code += GET(code, 1); 249253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE); 2493f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 249453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Otherwise, we can get the item's length from the table, except that for 249553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis repeated character types, we have to test for \p and \P, which have an extra 249653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we 249753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis must add in its length. */ 2498f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 249953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 2500f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 250153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis switch(c) 250253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 250353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPESTAR: 250453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEMINSTAR: 250553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEPLUS: 250653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEMINPLUS: 250753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEQUERY: 250853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEMINQUERY: 250953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEPOSSTAR: 251053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEPOSPLUS: 251153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEPOSQUERY: 251253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; 251353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 2514f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 251553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEPOSUPTO: 251653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEUPTO: 251753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEMINUPTO: 251853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TYPEEXACT: 251953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) 252053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code += 2; 252153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 2522f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 252353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_MARK: 252453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_PRUNE_ARG: 252553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_SKIP_ARG: 252653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_THEN_ARG: 252753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code += code[1]; 252853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 2529f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 2530f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 253153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Add in the fixed length from the table */ 2532f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2533f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code += PRIV(OP_lengths)[c]; 2534f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 253553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may 253653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis be followed by a multi-unit character. The length in the table is a 253753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis minimum, so we have to arrange to skip the extra units. */ 253853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 253953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef MAYBE_UTF_MULTI 254053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (utf) switch(c) 2541f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 254253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CHAR: 254353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_CHARI: 254453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOT: 254553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTI: 254653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_EXACT: 254753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_EXACTI: 254853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTEXACT: 254953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTEXACTI: 255053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_UPTO: 255153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_UPTOI: 255253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTUPTO: 255353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTUPTOI: 255453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_MINUPTO: 255553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_MINUPTOI: 255653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTMINUPTO: 255753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTMINUPTOI: 255853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_POSUPTO: 255953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_POSUPTOI: 256053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTPOSUPTO: 256153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTPOSUPTOI: 256253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_STAR: 256353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_STARI: 256453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTSTAR: 256553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTSTARI: 256653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_MINSTAR: 256753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_MINSTARI: 256853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTMINSTAR: 256953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTMINSTARI: 257053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_POSSTAR: 257153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_POSSTARI: 257253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTPOSSTAR: 257353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTPOSSTARI: 257453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_PLUS: 257553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_PLUSI: 257653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTPLUS: 257753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTPLUSI: 257853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_MINPLUS: 257953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_MINPLUSI: 258053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTMINPLUS: 258153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTMINPLUSI: 258253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_POSPLUS: 258353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_POSPLUSI: 258453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTPOSPLUS: 258553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTPOSPLUSI: 258653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_QUERY: 258753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_QUERYI: 258853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTQUERY: 258953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTQUERYI: 259053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_MINQUERY: 259153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_MINQUERYI: 259253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTMINQUERY: 259353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTMINQUERYI: 259453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_POSQUERY: 259553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_POSQUERYI: 259653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTPOSQUERY: 259753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_NOTPOSQUERYI: 259853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); 259953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 2600f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 260153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else 260253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (void)(utf); /* Keep compiler happy by referencing function argument */ 260353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif /* MAYBE_UTF_MULTI */ 260453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 260553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 260653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis} 2607f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2608f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2609f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 261053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/************************************************* 261153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Check for POSIX class syntax * 261253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*************************************************/ 261353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 261453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is called when the sequence "[:" or "[." or "[=" is 261553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisencountered in a character class. It checks whether this is followed by a 261653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissequence of characters terminated by a matching ":]" or ".]" or "=]". If we 261753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreach an unescaped ']' without the special preceding character, return FALSE. 2618f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 261953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisOriginally, this function only recognized a sequence of letters between the 262053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisterminators, but it seems that Perl recognizes any sequence of characters, 262153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthough of course unknown POSIX names are subsequently rejected. Perl gives an 262253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis"Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE 262353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisdidn't consider this to be a POSIX class. Likewise for [:1234:]. 2624f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 262553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisThe problem in trying to be exactly like Perl is in the handling of escapes. We 262653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskishave to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX 262753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisclass, but [abc[:x\]pqr:]] is (so that an error can be generated). The code 262853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisbelow handles the special cases \\ and \], but does not try to do any other 262953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisescape processing. This makes it different from Perl for cases such as 263053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis[:l\ower:] where Perl recognizes it as the POSIX class "lower" but PCRE does 263153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnot recognize "l\ower". This is a lesser evil than not diagnosing bad classes 263253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswhen Perl does, I think. 2633f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 263453e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisA user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not. 263553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisIt seems that the appearance of a nested POSIX class supersedes an apparent 263653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisexternal class. For example, [:a[:digit:]b:] matches "a", "b", ":", or 263753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisa digit. This is handled by returning FALSE if the start of a new group with 263853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe same terminator is encountered, since the next closing sequence must close 263953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe nested group, not the outer one. 2640f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 264153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisIn Perl, unescaped square brackets may also appear as part of class names. For 264253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisexample, [:a[:abc]b:] gives unknown POSIX class "[:abc]b:]". However, for 264353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis[:a[:abc]b][b:] it gives unknown POSIX class "[:abc]b][b:]", which does not 264453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisseem right at all. PCRE does not allow closing square brackets in POSIX class 264553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnames. 2646f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 264753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments: 264853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr pointer to the initial [ 264953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis endptr where to return a pointer to the terminating ':', '.', or '=' 2650f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 265153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns: TRUE or FALSE 265253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/ 2653f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 265453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic BOOL 265553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskischeck_posix_syntax(PCRE2_SPTR ptr, PCRE2_SPTR *endptr) 265653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis{ 265753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR terminator; /* Don't combine these lines; the Solaris cc */ 265853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisterminator = *(++ptr); /* compiler warns about "non-constant" initializer. */ 2659f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 266053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (++ptr; *ptr != CHAR_NULL; ptr++) 266153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 266253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*ptr == CHAR_BACKSLASH && 266353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET || ptr[1] == CHAR_BACKSLASH)) 266453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 266553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if ((*ptr == CHAR_LEFT_SQUARE_BRACKET && ptr[1] == terminator) || 266653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE; 266753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) 2668f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 266953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *endptr = ptr; 267053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis return TRUE; 2671f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 267253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 2673f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 267453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn FALSE; 267553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis} 2676f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2677f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2678f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 267953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/************************************************* 268053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Check POSIX class name * 268153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*************************************************/ 2682f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 268353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is called to check the name given in a POSIX-style class entry 268453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissuch as [:alnum:]. 2685f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 268653e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments: 268753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr points to the first letter 268853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis len the length of the name 2689f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 269053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns: a value representing the name, or -1 if unknown 269153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/ 2692f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 269353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic int 269453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskischeck_posix_name(PCRE2_SPTR ptr, int len) 269553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis{ 269653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisconst char *pn = posix_names; 269753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisregister int yield = 0; 269853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswhile (posix_name_lengths[yield] != 0) 269953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 270053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (len == posix_name_lengths[yield] && 270153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PRIV(strncmp_c8)(ptr, pn, (unsigned int)len) == 0) return yield; 270253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis pn += posix_name_lengths[yield] + 1; 270353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis yield++; 270453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 270553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn -1; 270653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis} 2707f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2708f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2709f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 271053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 271153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/************************************************* 271253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Get othercase range * 271353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*************************************************/ 2714f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 271553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is passed the start and end of a class range in UCT mode. It 271653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissearches up the characters, looking for ranges of characters in the "other" 271753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscase. Each call returns the next one, updating the start address. A character 271853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswith multiple other cases is returned on its own with a special return value. 2719f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 272053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments: 272153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cptr points to starting character value; updated 272253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis d end value 272353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ocptr where to put start of othercase range 272453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis odptr where to put end of othercase range 2725f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 272653e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisYield: -1 when no more 272753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0 when a range is returned 272853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis >0 the CASESET offset for char with multiple other cases 272953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis in this case, ocptr contains the original 273053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/ 2731f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 273253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic int 273353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisget_othercase_range(uint32_t *cptr, uint32_t d, uint32_t *ocptr, 273453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t *odptr) 273553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis{ 273653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t c, othercase, next; 273753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisunsigned int co; 2738f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 273953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Find the first character that has an other case. If it has multiple other 274053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscases, return its case offset value. */ 2741f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 274253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (c = *cptr; c <= d; c++) 274353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 274453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((co = UCD_CASESET(c)) != 0) 274553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 274653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *ocptr = c++; /* Character that has the set */ 274753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *cptr = c; /* Rest of input range */ 274853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis return (int)co; 274953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 275053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((othercase = UCD_OTHERCASE(c)) != c) break; 275153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 2752f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 275353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (c > d) return -1; /* Reached end of range */ 2754f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 275553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Found a character that has a single other case. Search for the end of the 275653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisrange, which is either the end of the input range, or a character that has zero 275753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisor more than one other cases. */ 2758f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 275953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*ocptr = othercase; 276053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnext = othercase + 1; 2761f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 276253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (++c; c <= d; c++) 276353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 276453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((co = UCD_CASESET(c)) != 0 || UCD_OTHERCASE(c) != next) break; 276553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis next++; 276653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 2767f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 276853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*odptr = next - 1; /* End of othercase range */ 276953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*cptr = c; /* Rest of input range */ 277053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn 0; 277153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis} 277253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif /* SUPPORT_UNICODE */ 2773f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2774f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2775f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 277653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/************************************************* 277753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Add a character or range to a class * 277853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*************************************************/ 2779f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 278053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function packages up the logic of adding a character or range of 278153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskischaracters to a class. The character values in the arguments will be within the 278253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisvalid values for the current mode (8-bit, 16-bit, UTF, etc). This function is 278353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismutually recursive with the function immediately below. 2784f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 278553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments: 278653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis classbits the bit map for characters < 256 278753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uchardptr points to the pointer for extra data 278853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis options the options word 278953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb compile data 279053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis start start of range character 279153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis end end of range character 2792f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 279353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns: the number of < 256 characters added 279453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis the pointer to extra data is updated 279553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/ 2796f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 27978b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisstatic unsigned int 279853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisadd_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options, 279953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis compile_block *cb, uint32_t start, uint32_t end) 280053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis{ 280153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t c; 280253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t classbits_end = (end <= 0xff ? end : 0xff); 28038b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisunsigned int n8 = 0; 2804f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 280553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* If caseless matching is required, scan the range and process alternate 280653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscases. In Unicode, there are 8-bit characters that have alternate cases that 280753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisare greater than 255 and vice-versa. Sometimes we can just extend the original 280853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisrange. */ 2809f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 281053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((options & PCRE2_CASELESS) != 0) 281153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 281253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 281353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_UTF) != 0) 281453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 281553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int rc; 281653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t oc, od; 2817f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 281853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis options &= ~PCRE2_CASELESS; /* Remove for recursive calls */ 281953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = start; 2820f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 282153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0) 282253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 282353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Handle a single character that has more than one other case. */ 2824f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 282553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (rc > 0) n8 += add_list_to_class(classbits, uchardptr, options, cb, 282653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PRIV(ucd_caseless_sets) + rc, oc); 2827f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 282853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Do nothing if the other case range is within the original range. */ 2829f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 283053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (oc >= start && od <= end) continue; 2831f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 283253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Extend the original range if there is overlap, noting that if oc < c, we 283353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis can't have od > end because a subrange is always shorter than the basic 283453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis range. Otherwise, use a recursive call to add the additional range. */ 2835f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 283653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */ 283753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (od > end && oc <= end + 1) 2838f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 283953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis end = od; /* Extend upwards */ 284053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (end > classbits_end) classbits_end = (end <= 0xff ? end : 0xff); 2841f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 284253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else n8 += add_to_class(classbits, uchardptr, options, cb, oc, od); 284353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 284453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 284553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 284653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif /* SUPPORT_UNICODE */ 2847f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 284853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Not UTF mode */ 2849f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 285053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (c = start; c <= classbits_end; c++) 285153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 285253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis SETBIT(classbits, cb->fcc[c]); 285353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis n8++; 285453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 285553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 2856f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 285753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Now handle the original range. Adjust the final value according to the bit 285853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskislength - this means that the same lists of (e.g.) horizontal spaces can be used 285953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisin all cases. */ 2860f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 286153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((options & PCRE2_UTF) == 0 && end > MAX_NON_UTF_CHAR) 286253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis end = MAX_NON_UTF_CHAR; 2863f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 286453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Use the bitmap for characters < 256. Otherwise use extra data.*/ 2865f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 286653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (c = start; c <= classbits_end; c++) 286753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 286853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Regardless of start, c will always be <= 255. */ 286953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis SETBIT(classbits, c); 287053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis n8++; 287153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 2872f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 287353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_WIDE_CHARS 287453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (start <= 0xff) start = 0xff + 1; 2875f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 287653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (end >= start) 287753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 287853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_UCHAR *uchardata = *uchardptr; 2879f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 288053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 288153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_UTF) != 0) 288253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 288353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (start < end) 288453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 288553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *uchardata++ = XCL_RANGE; 288653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uchardata += PRIV(ord2utf)(start, uchardata); 288753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uchardata += PRIV(ord2utf)(end, uchardata); 288853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 288953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (start == end) 289053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 289153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *uchardata++ = XCL_SINGLE; 289253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uchardata += PRIV(ord2utf)(start, uchardata); 2893f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 2894f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 289553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 289653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif /* SUPPORT_UNICODE */ 2897f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 289853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Without UTF support, character values are constrained by the bit length, 289953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis and can only be > 256 for 16-bit and 32-bit libraries. */ 2900f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 290153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if PCRE2_CODE_UNIT_WIDTH == 8 290253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis {} 290353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else 290453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (start < end) 290553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 290653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *uchardata++ = XCL_RANGE; 290753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *uchardata++ = start; 290853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *uchardata++ = end; 290953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 291053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (start == end) 291153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 291253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *uchardata++ = XCL_SINGLE; 291353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *uchardata++ = start; 291453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 291553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 291653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *uchardptr = uchardata; /* Updata extra data pointer */ 2917f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 291853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else 291953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (void)uchardptr; /* Avoid compiler warning */ 292053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif /* SUPPORT_WIDE_CHARS */ 2921f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 292253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn n8; /* Number of 8-bit characters */ 2923f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich} 2924f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2925f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2926f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2927f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/************************************************* 292853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Add a list of characters to a class * 2929f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/ 2930f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 293153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is used for adding a list of case-equivalent characters to a 293253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisclass, and also for adding a list of horizontal or vertical whitespace. If the 293353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskislist is in order (which it should be), ranges of characters are detected and 293453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskishandled appropriately. This function is mutually recursive with the function 293553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisabove. 2936f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2937f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments: 293853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis classbits the bit map for characters < 256 293953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uchardptr points to the pointer for extra data 294053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis options the options word 294153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb contains pointers to tables etc. 294253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis p points to row of 32-bit values, terminated by NOTACHAR 294353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis except character to omit; this is used when adding lists of 294453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case-equivalent characters to avoid including the one we 294553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis already know about 2946f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 294753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns: the number of < 256 characters added 294853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis the pointer to extra data is updated 2949f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/ 2950f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 29518b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisstatic unsigned int 295253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisadd_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options, 295353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis compile_block *cb, const uint32_t *p, unsigned int except) 2954f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{ 29558b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisunsigned int n8 = 0; 295653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswhile (p[0] < NOTACHAR) 2957f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 29588b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis unsigned int n = 0; 295953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (p[0] != except) 296053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 296153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while(p[n+1] == p[0] + n + 1) n++; 296253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis n8 += add_to_class(classbits, uchardptr, options, cb, p[0], p[n]); 296353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 296453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis p += n + 1; 296553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 296653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn n8; 296753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis} 29688366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes 29698366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes 2970f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 297153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/************************************************* 297253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Add characters not in a list to a class * 297353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*************************************************/ 2974f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 297553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is used for adding the complement of a list of horizontal or 297653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisvertical whitespace to a class. The list must be in order. 2977f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 297853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments: 297953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis classbits the bit map for characters < 256 298053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uchardptr points to the pointer for extra data 298153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis options the options word 298253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb contains pointers to tables etc. 298353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis p points to row of 32-bit values, terminated by NOTACHAR 2984f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 298553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns: the number of < 256 characters added 298653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis the pointer to extra data is updated 298753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/ 2988f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 29898b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisstatic unsigned int 299053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisadd_not_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, 299153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t options, compile_block *cb, const uint32_t *p) 299253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis{ 299353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisBOOL utf = (options & PCRE2_UTF) != 0; 29948b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisunsigned int n8 = 0; 299553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (p[0] > 0) 299653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis n8 += add_to_class(classbits, uchardptr, options, cb, 0, p[0] - 1); 299753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswhile (p[0] < NOTACHAR) 299853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 299953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (p[1] == p[0] + 1) p++; 300053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis n8 += add_to_class(classbits, uchardptr, options, cb, p[0] + 1, 300153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1); 300253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis p++; 300353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 300453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn n8; 300553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis} 3006f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3007f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3008f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 300953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/************************************************* 301053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Process (*VERB) name for escapes * 301153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*************************************************/ 3012f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 301353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is called when the PCRE2_ALT_VERBNAMES option is set, to 301453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisprocess the characters in a verb's name argument. It is called twice, once with 301553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscodeptr == NULL, to find out the length of the processed name, and again to put 301653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe name into memory. 3017f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 301853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments: 301953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptrptr pointer to the input pointer 302053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis codeptr pointer to the compiled code pointer 302153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcodeptr pointer to the error code 302253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis options the options bits 302353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis utf TRUE if processing UTF 302453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb compile data block 302553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 302653e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns: length of the processed name, or < 0 on error 302753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/ 3028f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 302953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic int 303053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisprocess_verb_name(PCRE2_SPTR *ptrptr, PCRE2_UCHAR **codeptr, int *errorcodeptr, 303153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t options, BOOL utf, compile_block *cb) 303253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis{ 303353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint32_t arglen = 0; 303453e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisBOOL inescq = FALSE; 303553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR ptr = *ptrptr; 303653e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *code = (codeptr == NULL)? NULL : *codeptr; 3037f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 303853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (; ptr < cb->end_pattern; ptr++) 303953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 304053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t x = *ptr; 3041f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 304253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Skip over literals */ 3043f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 304453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (inescq) 304553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 304653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (x == CHAR_BACKSLASH && ptr[1] == CHAR_E) 304753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 304853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis inescq = FALSE; 304953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++;; 305053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis continue; 3051f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 3052f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 3053f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 305453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else /* Not a literal character */ 3055f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 305653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (x == CHAR_RIGHT_PARENTHESIS) break; 3057f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 305853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Skip over comments and whitespace in extended mode. */ 3059f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 306053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_EXTENDED) != 0) 306153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 306253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_SPTR wscptr = ptr; 306353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (MAX_255(x) && (cb->ctypes[x] & ctype_space) != 0) x = *(++ptr); 306453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (x == CHAR_NUMBER_SIGN) 306553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 306653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 306753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (*ptr != CHAR_NULL || ptr < cb->end_pattern) 306853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 306953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */ 307053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { /* IS_NEWLINE sets cb->nllen. */ 307153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += cb->nllen; 307253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 307353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 307453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 307553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 307653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (utf) FORWARDCHAR(ptr); 3077f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 307853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 307953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3080f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 308153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If we have skipped any characters, restart the loop. */ 3082f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 308353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr > wscptr) 308453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 308553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr--; 308653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis continue; 308753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 308853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3089f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 309053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Process escapes */ 3091f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 309253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (x == '\\') 309353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 309453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int rc; 309553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = 0; 309653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis rc = PRIV(check_escape)(&ptr, cb->end_pattern, &x, errorcodeptr, options, 309753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis FALSE, cb); 309853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *ptrptr = ptr; /* For possible error */ 309953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*errorcodeptr != 0) return -1; 310053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (rc != 0) 310153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 310253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (rc == ESC_Q) 310353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 310453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis inescq = TRUE; 310553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis continue; 310653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 310753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (rc == ESC_E) continue; 310853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR40; 310953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis return -1; 311053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 311153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 311253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3113f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 311453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* We have the next character in the name. */ 3115f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 311653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 311753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (utf) 311853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 311953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (code == NULL) /* Just want the length */ 312053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 312153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if PCRE2_CODE_UNIT_WIDTH == 8 312253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int i; 312353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (i = 0; i < PRIV(utf8_table1_size); i++) 312453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((int)x <= PRIV(utf8_table1)[i]) break; 312553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis arglen += i; 312653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#elif PCRE2_CODE_UNIT_WIDTH == 16 312753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (x > 0xffff) arglen++; 312853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 312953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 313053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 313153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 313253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_UCHAR cbuff[8]; 313353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis x = PRIV(ord2utf)(x, cbuff); 313453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis memcpy(code, cbuff, CU2BYTES(x)); 313553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code += x; 313653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 313753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 313853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 313953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif /* SUPPORT_UNICODE */ 3140f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 314153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Not UTF */ 314253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 31438b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis if (code != NULL) *code++ = (PCRE2_UCHAR)x; 314453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3145f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 314653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis arglen++; 314753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 314853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((unsigned int)arglen > MAX_MARK) 3149f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 315053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR76; 315153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *ptrptr = ptr; 315253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis return -1; 3153f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 3154f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 3155f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 315653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Update the pointers before returning. */ 315753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 315853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*ptrptr = ptr; 315953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (codeptr != NULL) *codeptr = code; 316053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn arglen; 316153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis} 3162f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3163f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3164f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3165f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/************************************************* 316653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Macro for the next two functions * 3167f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/ 3168f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 316953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Both scan_for_captures() and compile_branch() use this macro to generate a 317053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfragment of code that reads the characters of a name and sets its length 317153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis(checking for not being too long). Count the characters dynamically, to avoid 317253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe possibility of integer overflow. The same macro is used for reading *VERB 317353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnames. */ 317453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 317553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define READ_NAME(ctype, errno, errset) \ 317653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis namelen = 0; \ 317753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype) != 0) \ 317853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { \ 317953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; \ 318053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis namelen++; \ 318153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (namelen > MAX_NAME_SIZE) \ 318253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { \ 318353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errset = errno; \ 318453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; \ 318553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } \ 318653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3187f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3188f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3189f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3190f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/************************************************* 319153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Scan regex to identify named groups * 3192f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/ 3193f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 319453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is called first of all, to scan for named capturing groups so 319553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthat information about them is fully available to both the compiling scans. 319653e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisIt skips over everything except parenthesized items. 3197f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3198f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments: 319953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptrptr points to pointer to the start of the pattern 320053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis options compiling dynamic options 320153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb pointer to the compile data block 3202f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 320353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns: zero on success or a non-zero error code, with pointer updated 3204f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/ 3205f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 320653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskistypedef struct nest_save { 320753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint16_t nest_depth; 320853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint16_t reset_group; 320953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint16_t max_group; 321053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint16_t flags; 321153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis} nest_save; 321253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 321353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define NSF_RESET 0x0001u 321453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define NSF_EXTENDED 0x0002u 321553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define NSF_DUPNAMES 0x0004u 3216f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 32178b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisstatic int scan_for_captures(PCRE2_SPTR *ptrptr, uint32_t options, 321853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis compile_block *cb) 321953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis{ 322053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t c; 322153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t delimiter; 322253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t set, unset, *optset; 32238b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisuint32_t skiptoket = 0; 32248b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisuint16_t nest_depth = 0; 322553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint errorcode = 0; 322653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint escape; 322753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint namelen; 322853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint i; 322953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisBOOL inescq = FALSE; 323053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisBOOL isdupname; 323153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisBOOL utf = (options & PCRE2_UTF) != 0; 323253e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisBOOL negate_class; 323353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR name; 323453e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR start; 323553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR ptr = *ptrptr; 323653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnamed_group *ng; 323753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnest_save *top_nest = NULL; 323853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnest_save *end_nests = (nest_save *)(cb->start_workspace + cb->workspace_size); 323953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 324053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* The size of the nest_save structure might not be a factor of the size of the 324153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisworkspace. Therefore we must round down end_nests so as to correctly avoid 324253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscreating a nest_save that spans the end of the workspace. */ 324353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 324453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisend_nests = (nest_save *)((char *)end_nests - 324553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ((cb->workspace_size * sizeof(PCRE2_UCHAR)) % sizeof(nest_save))); 324653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 324753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Now scan the pattern */ 324853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 324953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (; ptr < cb->end_pattern; ptr++) 3250f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 325153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = *ptr; 325253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 325353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Parenthesized groups set skiptoket when all following characters up to the 325453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis next closing parenthesis must be ignored. The parenthesis itself must be 325553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis processed (to end the nested parenthesized item). */ 325653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 32578b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis if (skiptoket != 0) 3258f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 325953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c != CHAR_RIGHT_PARENTHESIS) continue; 32608b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis skiptoket = 0; 3261f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 3262f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 326353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Skip over literals */ 3264f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 326553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (inescq) 3266f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 326753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E) 326853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 326953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis inescq = FALSE; 327053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 327153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 327253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis continue; 3273f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 3274f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 32758b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis /* Skip over # comments and whitespace in extended mode. */ 327653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 327753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_EXTENDED) != 0) 327853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 32798b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis PCRE2_SPTR wscptr = ptr; 32808b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis while (MAX_255(c) && (cb->ctypes[c] & ctype_space) != 0) c = *(++ptr); 32818b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis if (c == CHAR_NUMBER_SIGN) 328253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 328353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 32848b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis while (ptr < cb->end_pattern) 328553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 328653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */ 328753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { /* IS_NEWLINE sets cb->nllen. */ 328853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += cb->nllen; 328953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 329053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 329153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 329253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 329353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (utf) FORWARDCHAR(ptr); 329453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 329553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 32968b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis } 32978b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis 32988b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis /* If we skipped any characters, restart the loop. Otherwise, we didn't see 32998b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis a comment. */ 33008b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis 33018b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis if (ptr > wscptr) 33028b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis { 33038b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis ptr--; 33048b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis continue; 330553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 330653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 33070ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes 330853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Process the next pattern item. */ 33090ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes 331053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis switch(c) 331153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 331253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis default: /* Most characters are just skipped */ 331353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 3314f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 331553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Skip escapes except for \Q */ 3316f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 331753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_BACKSLASH: 331853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = 0; 331953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis escape = PRIV(check_escape)(&ptr, cb->end_pattern, &c, &errorcode, options, 332053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis FALSE, cb); 332153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (errorcode != 0) goto FAILED; 332253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (escape == ESC_Q) inescq = TRUE; 332353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 3324f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 332553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Skip a character class. The syntax is complicated so we have to 332653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis replicate some of what happens when a class is processed for real. */ 3327f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 332853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_LEFT_SQUARE_BRACKET: 332953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (PRIV(strncmp_c8)(ptr+1, STRING_WEIRD_STARTWORD, 6) == 0 || 333053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PRIV(strncmp_c8)(ptr+1, STRING_WEIRD_ENDWORD, 6) == 0) 333153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 333253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += 6; 333353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 333453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3335f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 333653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If the first character is '^', set the negation flag (not actually used 333753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis here, except to recognize only one ^) and skip it. If the first few 333853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis characters (either before or after ^) are \Q\E or \E we skip them too. This 333953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis makes for compatibility with Perl. */ 3340f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 334153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis negate_class = FALSE; 334253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (;;) 334353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 334453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = *(++ptr); /* First character in class */ 334553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == CHAR_BACKSLASH) 334653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 334753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[1] == CHAR_E) 334853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 334953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (PRIV(strncmp_c8)(ptr + 1, STR_Q STR_BACKSLASH STR_E, 3) == 0) 335053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += 3; 335153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 335253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 335353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 335453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT) 335553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis negate_class = TRUE; 335653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else break; 335753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3358f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 335953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == CHAR_RIGHT_SQUARE_BRACKET && 336053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (cb->external_options & PCRE2_ALLOW_EMPTY_CLASS) != 0) 336153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 3362f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 336353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Loop for the contents of the class */ 3364f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 336553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (;;) 336653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 336753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_SPTR tempptr; 3368f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 336953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == CHAR_NULL && ptr >= cb->end_pattern) 337053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 337153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR6; /* Missing terminating ']' */ 337253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 337353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3374f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 337553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 337653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (utf && HAS_EXTRALEN(c)) 337753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { /* Braces are required because the */ 337853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */ 337953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 338053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 3381f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 338253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Inside \Q...\E everything is literal except \E */ 3383f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 338453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (inescq) 338553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 338653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E) /* If we are at \E */ 338753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 338853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis inescq = FALSE; /* Reset literal state */ 338953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; /* Skip the 'E' */ 339053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 339153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto CONTINUE_CLASS; 339253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3393f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 339453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Skip POSIX class names. */ 339553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == CHAR_LEFT_SQUARE_BRACKET && 339653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || 339753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr)) 339853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 339953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr = tempptr + 1; 340053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 340153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (c == CHAR_BACKSLASH) 340253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 340353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = 0; 340453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis escape = PRIV(check_escape)(&ptr, cb->end_pattern, &c, &errorcode, 340553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis options, TRUE, cb); 340653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (errorcode != 0) goto FAILED; 340753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (escape == ESC_Q) inescq = TRUE; 340853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 340953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 341053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis CONTINUE_CLASS: 341153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = *(++ptr); 341253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == CHAR_RIGHT_SQUARE_BRACKET && !inescq) break; 341353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } /* End of class-processing loop */ 341453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 3415f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 341653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* This is the real work of this function - handling parentheses. */ 3417f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 341853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_LEFT_PARENTHESIS: 341953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis nest_depth++; 3420f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 342153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[1] != CHAR_QUESTION_MARK) 342253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 342353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[1] != CHAR_ASTERISK) 342453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 342553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_NO_AUTO_CAPTURE) == 0) cb->bracount++; 342653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3427f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 34288b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis /* (*something) - skip over a name, and then just skip to closing ket 34298b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis unless PCRE2_ALT_VERBNAMES is set, in which case we have to process 34308b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis escapes in the string after a verb name terminated by a colon. */ 3431f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 343253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 343353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 343453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += 2; 343553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) != 0) ptr++; 34368b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis if (*ptr == CHAR_COLON && (options & PCRE2_ALT_VERBNAMES) != 0) 343753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 343853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 34398b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis if (process_verb_name(&ptr, NULL, &errorcode, options, utf, cb) < 0) 34408b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis goto FAILED; 34418b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis } 34428b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis else 34438b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis { 34448b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) 34458b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis ptr++; 344653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 344753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis nest_depth--; 344853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 344953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3450f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 345153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Handle (?...) groups */ 3452f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 345353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else switch(ptr[2]) 345453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 345553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis default: 345653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += 2; 345753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[0] == CHAR_R || /* (?R) */ 345853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr[0] == CHAR_NUMBER_SIGN || /* (?#) */ 345953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis IS_DIGIT(ptr[0]) || /* (?n) */ 346053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (ptr[0] == CHAR_MINUS && IS_DIGIT(ptr[1]))) /* (?-n) */ 346153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 34628b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis skiptoket = ptr[0]; 346353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 346453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3465f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 346653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Handle (?| and (?imsxJU: which are the only other valid forms. Both 346753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis need a new block on the nest stack. */ 3468f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 346953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (top_nest == NULL) top_nest = (nest_save *)(cb->start_workspace); 347053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (++top_nest >= end_nests) 347153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 347253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR84; 347353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 347453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 347553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis top_nest->nest_depth = nest_depth; 347653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis top_nest->flags = 0; 347753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_EXTENDED) != 0) top_nest->flags |= NSF_EXTENDED; 347853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_DUPNAMES) != 0) top_nest->flags |= NSF_DUPNAMES; 3479f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 348053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*ptr == CHAR_VERTICAL_LINE) 348153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 34828b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis top_nest->reset_group = (uint16_t)cb->bracount; 34838b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis top_nest->max_group = (uint16_t)cb->bracount; 348453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis top_nest->flags |= NSF_RESET; 348553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->external_flags |= PCRE2_DUPCAPUSED; 348653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 348753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3488f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 348953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Scan options */ 3490f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 349153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis top_nest->reset_group = 0; 349253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis top_nest->max_group = 0; 3493f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 349453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis set = unset = 0; 349553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis optset = &set; 3496f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 349753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Need only track (?x: and (?J: at this stage */ 3498f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 349953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON) 350053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 350153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis switch (*ptr++) 350253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 350353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_MINUS: optset = &unset; break; 3504f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 350553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_x: *optset |= PCRE2_EXTENDED; break; 3506f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 350753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_J: 350853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *optset |= PCRE2_DUPNAMES; 350953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->external_flags |= PCRE2_JCHANGED; 351053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 3511f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 351253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_i: 351353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_m: 351453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_s: 351553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_U: 351653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 3517f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 35188b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis default: 35198b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis errorcode = ERR11; 35208b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis ptr--; /* Correct the offset */ 35218b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis goto FAILED; 352253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 352353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3524f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 352553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis options = (options | set) & (~unset); 3526f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 352753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If the options ended with ')' this is not the start of a nested 352853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis group with option changes, so the options change at this level. If the 352953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis previous level set up a nest block, discard the one we have just created. 353053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis Otherwise adjust it for the previous level. */ 3531f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 353253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*ptr == CHAR_RIGHT_PARENTHESIS) 353353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 353453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis nest_depth--; 353553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (top_nest > (nest_save *)(cb->start_workspace) && 353653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (top_nest-1)->nest_depth == nest_depth) top_nest --; 353753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else top_nest->nest_depth = nest_depth; 353853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 353953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 3540f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 354153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Skip over a numerical or string argument for a callout. */ 3542f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 354353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_C: 354453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += 2; 354553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[1] == CHAR_RIGHT_PARENTHESIS) break; 354653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (IS_DIGIT(ptr[1])) 354753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 354853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (IS_DIGIT(ptr[1])) ptr++; 354953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3550f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 355153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Handle a string argument */ 3552f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 355353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 355453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 355553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 355653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis delimiter = 0; 355753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (i = 0; PRIV(callout_start_delims)[i] != 0; i++) 355853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 355953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*ptr == PRIV(callout_start_delims)[i]) 356053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 356153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis delimiter = PRIV(callout_end_delims)[i]; 356253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 356353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 356453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3565f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 356653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (delimiter == 0) 356753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 356853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR82; 356953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 357053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3571f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 357253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis start = ptr; 357353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis do 357453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 357553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (++ptr >= cb->end_pattern) 357653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 357753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR81; 357853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr = start; /* To give a more useful message */ 357953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 358053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 358153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[0] == delimiter && ptr[1] == delimiter) ptr += 2; 358253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 358353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (ptr[0] != delimiter); 358453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3585f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 358653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Check terminating ) */ 3587f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 358853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[1] != CHAR_RIGHT_PARENTHESIS) 35898366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes { 359053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR39; 359153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 359253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 35938366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes } 359453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 3595f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 359653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Conditional group */ 3597f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 359853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_LEFT_PARENTHESIS: 359953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[3] != CHAR_QUESTION_MARK) /* Not assertion or callout */ 360053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 360153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis nest_depth++; 360253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += 2; 360353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 360453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3605f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 360653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Must be an assertion or a callout */ 3607f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 360853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis switch(ptr[4]) 360953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 361053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_LESS_THAN_SIGN: 361153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[5] != CHAR_EXCLAMATION_MARK && ptr[5] != CHAR_EQUALS_SIGN) 361253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto MISSING_ASSERTION; 361353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Fall through */ 361453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 361553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_C: 361653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_EXCLAMATION_MARK: 361753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_EQUALS_SIGN: 361853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 361953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 3620f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 362153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis default: 362253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis MISSING_ASSERTION: 362353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += 3; /* To improve error message */ 362453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR28; 362553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 362653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 362753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 3628f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 362953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_COLON: 363053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_GREATER_THAN_SIGN: 363153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_EQUALS_SIGN: 363253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_EXCLAMATION_MARK: 363353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_AMPERSAND: 363453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_PLUS: 363553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += 2; 363653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 3637f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 363853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_P: 363953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[3] != CHAR_LESS_THAN_SIGN) 364053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 364153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += 3; 364253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 364353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 364453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 364553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = CHAR_GREATER_THAN_SIGN; /* Terminator */ 364653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto DEFINE_NAME; 3647f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 364853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_LESS_THAN_SIGN: 364953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[3] == CHAR_EQUALS_SIGN || ptr[3] == CHAR_EXCLAMATION_MARK) 365053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 365153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += 3; 365253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 365353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 365453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = CHAR_GREATER_THAN_SIGN; /* Terminator */ 365553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto DEFINE_NAME; 3656f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 365753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_APOSTROPHE: 365853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = CHAR_APOSTROPHE; /* Terminator */ 3659f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 366053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis DEFINE_NAME: 366153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis name = ptr = ptr + 3; 3662f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 366353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*ptr == c) /* Empty name */ 366453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 366553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR62; 366653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 366753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3668f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 366953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (IS_DIGIT(*ptr)) 367053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 367153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR44; /* Group name must start with non-digit */ 367253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 367353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3674f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 367553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) == 0) 367653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 367753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR24; 367853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 367953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3680f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 368153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Advance ptr, set namelen and check its length. */ 368253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis READ_NAME(ctype_word, ERR48, errorcode); 368353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 368453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*ptr != c) 368553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 368653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR42; 368753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 368853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3689f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 369053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (cb->names_found >= MAX_NAME_COUNT) 369153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 369253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR49; 369353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 369453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3695f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 369653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (namelen + IMM2_SIZE + 1 > cb->name_entry_size) 36978b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis cb->name_entry_size = (uint16_t)(namelen + IMM2_SIZE + 1); 3698f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 369953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* We have a valid name for this capturing group. */ 3700f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 370153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->bracount++; 3702f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 370353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Scan the list to check for duplicates. For duplicate names, if the 370453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis number is the same, break the loop, which causes the name to be 370553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis discarded; otherwise, if DUPNAMES is not set, give an error. 370653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis If it is set, allow the name with a different number, but continue 370753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis scanning in case this is a duplicate with the same number. For 370853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis non-duplicate names, give an error if the number is duplicated. */ 3709f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 371053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis isdupname = FALSE; 371153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ng = cb->named_groups; 371253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (i = 0; i < cb->names_found; i++, ng++) 371353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 371453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (namelen == ng->length && 37158b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis PRIV(strncmp)(name, ng->name, (size_t)namelen) == 0) 371653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 371753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ng->number == cb->bracount) break; 371853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_DUPNAMES) == 0) 371953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 372053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR43; 372153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 372253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 372353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis isdupname = ng->isdup = TRUE; /* Mark as a duplicate */ 372453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->dupnames = TRUE; /* Duplicate names exist */ 372553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 372653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (ng->number == cb->bracount) 372753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 372853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR65; 372953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 373053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 373153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3732f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 373353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (i < cb->names_found) break; /* Ignore duplicate with same number */ 3734f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 373553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Increase the list size if necessary */ 3736f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 373753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (cb->names_found >= cb->named_group_list_size) 373853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 37398b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis uint32_t newsize = cb->named_group_list_size * 2; 374053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis named_group *newspace = 374153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->cx->memctl.malloc(newsize * sizeof(named_group), 374253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->cx->memctl.memory_data); 374353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (newspace == NULL) 374453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 374553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR21; 374653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 374753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3748f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 374953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis memcpy(newspace, cb->named_groups, 375053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->named_group_list_size * sizeof(named_group)); 375153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (cb->named_group_list_size > NAMED_GROUP_LIST_SIZE) 375253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->cx->memctl.free((void *)cb->named_groups, 375353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->cx->memctl.memory_data); 375453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->named_groups = newspace; 375553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->named_group_list_size = newsize; 375653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3757f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 375853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Add this name to the list */ 3759f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 376053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->named_groups[cb->names_found].name = name; 37618b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis cb->named_groups[cb->names_found].length = (uint16_t)namelen; 376253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->named_groups[cb->names_found].number = cb->bracount; 37638b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis cb->named_groups[cb->names_found].isdup = (uint16_t)isdupname; 376453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->names_found++; 376553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 376653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } /* End of (? switch */ 376753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; /* End of ( handling */ 3768f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 376953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* At an alternation, reset the capture count if we are in a (?| group. */ 3770f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 377153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_VERTICAL_LINE: 377253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (top_nest != NULL && top_nest->nest_depth == nest_depth && 377353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (top_nest->flags & NSF_RESET) != 0) 377453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 377553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (cb->bracount > top_nest->max_group) 37768b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis top_nest->max_group = (uint16_t)cb->bracount; 377753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->bracount = top_nest->reset_group; 377853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 377953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 3780f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 378153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* At a right parenthesis, reset the capture count to the maximum if we 378253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis are in a (?| group and/or reset the extended option. */ 378353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 378453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_RIGHT_PARENTHESIS: 378553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (top_nest != NULL && top_nest->nest_depth == nest_depth) 378653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 378753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((top_nest->flags & NSF_RESET) != 0 && 378853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis top_nest->max_group > cb->bracount) 378953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->bracount = top_nest->max_group; 379053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((top_nest->flags & NSF_EXTENDED) != 0) options |= PCRE2_EXTENDED; 379153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else options &= ~PCRE2_EXTENDED; 379253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((top_nest->flags & NSF_DUPNAMES) != 0) options |= PCRE2_DUPNAMES; 379353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else options &= ~PCRE2_DUPNAMES; 379453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (top_nest == (nest_save *)(cb->start_workspace)) top_nest = NULL; 379553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else top_nest--; 379653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 37978b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis if (nest_depth == 0) /* Unmatched closing parenthesis */ 37988b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis { 37998b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis errorcode = ERR22; 38008b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis goto FAILED; 38018b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis } 38028b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis nest_depth--; 380353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 380453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 3805f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 380653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 38078b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisif (nest_depth == 0) 38088b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis { 38098b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis cb->final_bracount = cb->bracount; 38108b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis return 0; 38118b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis } 38128b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis 38138b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis/* We give a special error for a missing closing parentheses after (?# because 38148b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisit might otherwise be hard to see where the missing character is. */ 38158b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis 38168b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskiserrorcode = (skiptoket == CHAR_NUMBER_SIGN)? ERR18 : ERR14; 381753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 381853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisFAILED: 381953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*ptrptr = ptr; 382053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn errorcode; 3821f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich} 3822f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3823f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3824f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3825f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/************************************************* 3826f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich* Compile one branch * 3827f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/ 3828f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3829f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Scan the pattern, compiling it into the a vector. If the options are 3830f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichchanged during the branch, the pointer is used to change the external options 3831f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbits. This function is used during the pre-compile phase when we are trying 3832f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichto find out the amount of memory needed, as well as during the real compile 3833f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichphase. The value of lengthptr distinguishes the two phases. 3834f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3835f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments: 3836f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich optionsptr pointer to the option bits 3837f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich codeptr points to the pointer to the current code point 3838f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptrptr points to the current pattern pointer 3839f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich errorcodeptr points to error code variable 384053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis firstcuptr place to put the first required code unit 384153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis firstcuflagsptr place to put the first code unit flags, or a negative number 384253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcuptr place to put the last required code unit 384353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcuflagsptr place to put the last required code unit flags, or a negative number 3844f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bcptr points to current branch chain 3845f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cond_depth conditional nesting depth 384653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb contains pointers to tables etc. 3847f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich lengthptr NULL during the real compile phase 3848f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich points to length accumulator during pre-compile phase 3849f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3850f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichReturns: TRUE on success 3851f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich FALSE, with *errorcodeptr set non-zero on error 3852f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/ 3853f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3854f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic BOOL 385553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscompile_branch(uint32_t *optionsptr, PCRE2_UCHAR **codeptr, 385653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_SPTR *ptrptr, int *errorcodeptr, 385753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t *firstcuptr, int32_t *firstcuflagsptr, 385853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t *reqcuptr, int32_t *reqcuflagsptr, 3859f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich branch_chain *bcptr, int cond_depth, 386053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis compile_block *cb, size_t *lengthptr) 3861f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{ 3862f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichint repeat_min = 0, repeat_max = 0; /* To please picky compilers */ 3863f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichint bravalue = 0; 386453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t greedy_default, greedy_non_default; 386553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t repeat_type, op_type; 386653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t options = *optionsptr; /* May change dynamically */ 386753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t firstcu, reqcu; 386853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint32_t firstcuflags, reqcuflags; 386953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t zeroreqcu, zerofirstcu; 387053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint32_t zeroreqcuflags, zerofirstcuflags; 387153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint32_t req_caseopt, reqvary, tempreqvary; 3872f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichint after_manual_callout = 0; 3873f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichint escape; 387453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissize_t length_prevgroup = 0; 387553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisregister uint32_t c; 387653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisregister PCRE2_UCHAR *code = *codeptr; 387753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *last_code = code; 387853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *orig_code = code; 387953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *tempcode; 3880f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichBOOL inescq = FALSE; 388153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisBOOL groupsetfirstcu = FALSE; 388253e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR ptr = *ptrptr; 388353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR tempptr; 388453e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *previous = NULL; 388553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *previous_callout = NULL; 388653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint8_t classbits[32]; 388753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 388853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* We can fish out the UTF setting once and for all into a BOOL, but we must 388953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnot do this for other options (e.g. PCRE2_EXTENDED) because they may change 3890f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichdynamically as we process the pattern. */ 3891f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 389253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 389353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisBOOL utf = (options & PCRE2_UTF) != 0; 389453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if PCRE2_CODE_UNIT_WIDTH != 32 389553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR utf_units[6]; /* For setting up multi-cu chars */ 3896f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 389753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 389853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else /* No UTF support */ 3899f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichBOOL utf = FALSE; 3900f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 3901f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3902f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Helper variables for OP_XCLASS opcode (for characters > 255). We define 3903f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichclass_uchardata always so that it can be passed to add_to_class() always, 3904f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichthough it will not be used in non-UTF 8-bit cases. This avoids having to supply 3905f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichalternative calls for the different cases. */ 3906f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 390753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *class_uchardata; 390853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_WIDE_CHARS 3909f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichBOOL xclass; 391053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *class_uchardata_base; 3911f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 3912f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3913f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Set up the default and non-default settings for greediness */ 3914f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 391553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisgreedy_default = ((options & PCRE2_UNGREEDY) != 0); 3916f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichgreedy_non_default = greedy_default ^ 1; 3917f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 391853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Initialize no first unit, no required unit. REQ_UNSET means "no char 3919f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichmatching encountered yet". It gets changed to REQ_NONE if we hit something that 392053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismatches a non-fixed first unit; reqcu just remains unset if we never find one. 3921f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3922f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichWhen we hit a repeat whose minimum is zero, we may have to adjust these values 3923f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichto take the zero repeat into account. This is implemented by setting them to 392453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiszerofirstcu and zeroreqcu when such a repeat is encountered. The individual 3925f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichitem types that can be repeated set these backoff variables appropriately. */ 3926f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 392753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfirstcu = reqcu = zerofirstcu = zeroreqcu = 0; 392853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfirstcuflags = reqcuflags = zerofirstcuflags = zeroreqcuflags = REQ_UNSET; 3929f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 393053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* The variable req_caseopt contains either the REQ_CASELESS value or zero, 393153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisaccording to the current setting of the caseless flag. The REQ_CASELESS value 393253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisleaves the lower 28 bit empty. It is added into the firstcu or reqcu variables 393353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisto record the case status of the value. This is used only for ASCII characters. 393453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/ 3935f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 393653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreq_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS:0; 3937f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3938f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Switch on next character until the end of the branch */ 3939f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3940f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichfor (;; ptr++) 3941f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 3942f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich BOOL negate_class; 3943f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich BOOL should_flip_negation; 394453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis BOOL match_all_or_no_wide_chars; 3945f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich BOOL possessive_quantifier; 3946f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich BOOL is_quantifier; 3947f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich BOOL is_recurse; 394853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis BOOL is_dupname; 3949f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich BOOL reset_bracount; 3950f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int class_has_8bitchar; 3951f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int class_one_char; 395253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_WIDE_CHARS 3953f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich BOOL xclass_has_prop; 3954f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 395553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int recno; /* Must be signed */ 395653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int refsign; /* Must be signed */ 395753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int terminator; /* Must be signed */ 3958f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich unsigned int mclength; 3959f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich unsigned int tempbracount; 396053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t ec; 396153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t newoptions; 396253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t skipunits; 396353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t subreqcu, subfirstcu; 396453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int32_t subreqcuflags, subfirstcuflags; /* Must be signed */ 396553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_UCHAR mcbuffer[8]; 3966f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 39678b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis /* Come here to restart the loop. */ 39688b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis 39698b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis REDO_LOOP: 39708b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis 3971f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Get next character in the pattern */ 3972f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3973f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich c = *ptr; 3974f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3975f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If we are at the end of a nested substitution, revert to the outer level 397653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis string. Nesting only happens one or two levels deep, and the inserted string 397753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis is always zero terminated. */ 3978f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 397953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == CHAR_NULL && cb->nestptr[0] != NULL) 3980f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 398153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr = cb->nestptr[0]; 398253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->nestptr[0] = cb->nestptr[1]; 398353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->nestptr[1] = NULL; 3984f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich c = *ptr; 3985f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 3986f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3987f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If we are in the pre-compile phase, accumulate the length used for the 3988f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich previous cycle of this loop. */ 3989f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 3990f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (lengthptr != NULL) 3991f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 399253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (code > cb->start_workspace + cb->workspace_size - 3993f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich WORK_SIZE_SAFETY_MARGIN) /* Check for overrun */ 3994f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 399553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = (code >= cb->start_workspace + cb->workspace_size)? 399653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ERR52 : ERR86; 3997f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 3998f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 3999f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4000f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* There is at least one situation where code goes backwards: this is the 4001f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case of a zero quantifier after a class (e.g. [ab]{0}). At compile time, 4002f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich the class is simply eliminated. However, it is created first, so we have to 4003f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich allow memory for it. Therefore, don't ever reduce the length at this point. 4004f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich */ 4005f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4006f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (code < last_code) code = last_code; 4007f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4008f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Paranoid check for integer overflow */ 4009f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 401053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (OFLOW_MAX - *lengthptr < (size_t)(code - last_code)) 4011f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4012f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR20; 4013f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 4014f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 40158b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis *lengthptr += (size_t)(code - last_code); 4016f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4017f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If "previous" is set and it is not at the start of the work space, move 4018f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich it back to there, in order to avoid filling up the work space. Otherwise, 4019f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if "previous" is NULL, reset the current code pointer to the start. */ 4020f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4021f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (previous != NULL) 4022f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4023f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (previous > orig_code) 4024f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 40258b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis memmove(orig_code, previous, (size_t)CU2BYTES(code - previous)); 4026f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code -= previous - orig_code; 4027f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich previous = orig_code; 4028f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4029f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4030f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else code = orig_code; 4031f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4032f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Remember where this code item starts so we can pick up the length 4033f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich next time round. */ 4034f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4035f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich last_code = code; 4036f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4037f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 403853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Before doing anything else we must handle all the special items that do 403953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis nothing, and which may come between an item and its quantifier. Otherwise, 404053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis when auto-callouts are enabled, a callout gets incorrectly inserted before 404153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis the quantifier is recognized. After recognizing a "do nothing" item, restart 404253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis the loop in case another one follows. */ 4043f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 404453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If c is not NULL we are not at the end of the pattern. If it is NULL, we 404553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis may still be in the pattern with a NULL data item. In these cases, if we are 404653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis in \Q...\E, check for the \E that ends the literal string; if not, we have a 404753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis literal character. If not in \Q...\E, an isolated \E is ignored. */ 4048f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 404953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c != CHAR_NULL || ptr < cb->end_pattern) 4050f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4051f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E) 4052f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4053f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich inescq = FALSE; 4054f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 4055f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich continue; 4056f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 405753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (inescq) /* Literal character */ 4058f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4059f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (previous_callout != NULL) 4060f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4061f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (lengthptr == NULL) /* Don't attempt in pre-compile phase */ 406253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis complete_callout(previous_callout, ptr, cb); 4063f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich previous_callout = NULL; 4064f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 406553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_AUTO_CALLOUT) != 0) 4066f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4067f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich previous_callout = code; 406853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code = auto_callout(code, ptr, cb); 4069f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4070f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto NORMAL_CHAR; 4071f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 407253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 407353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Check for the start of a \Q...\E sequence. We must do this here rather 407453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis than later in case it is immediately followed by \E, which turns it into a 407553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis "do nothing" sequence. */ 407653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 407753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == CHAR_BACKSLASH && ptr[1] == CHAR_Q) 407853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 407953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis inescq = TRUE; 408053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 408153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis continue; 408253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 4083f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4084f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 408553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* In extended mode, skip white space and #-comments that end at newline. */ 4086f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 408753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_EXTENDED) != 0) 4088f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 408953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_SPTR wscptr = ptr; 409053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (MAX_255(c) && (cb->ctypes[c] & ctype_space) != 0) c = *(++ptr); 409153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == CHAR_NUMBER_SIGN) 4092f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4093f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 409453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (ptr < cb->end_pattern) 4095f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4096f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */ 409753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { /* IS_NEWLINE sets cb->nllen. */ 409853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += cb->nllen; 4099f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 4100f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4101f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 410253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 4103f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (utf) FORWARDCHAR(ptr); 4104f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 4105f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4106f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4107f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 410853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If we skipped any characters, restart the loop. Otherwise, we didn't see 410953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis a comment. */ 4110f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 41118b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis if (ptr > wscptr) goto REDO_LOOP; 411253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 4113f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 411453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Skip over (?# comments. */ 4115f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 411653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == CHAR_LEFT_PARENTHESIS && ptr[1] == CHAR_QUESTION_MARK && 411753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr[2] == CHAR_NUMBER_SIGN) 4118f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 411953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += 3; 412053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; 412153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*ptr != CHAR_RIGHT_PARENTHESIS) 412253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 412353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR18; 412453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 412553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 412653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis continue; 4127f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4128f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 412953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* End of processing "do nothing" items. See if the next thing is a 413053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis quantifier. */ 413153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 413253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis is_quantifier = 413353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK || 413453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1)); 413553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 413653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Fill in length of a previous callout and create an auto callout if 413753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis required, except when the next thing is a quantifier or when processing a 413853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis property substitution string for \w etc in UCP mode. */ 4139f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 414053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!is_quantifier && cb->nestptr[0] == NULL) 4141f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 414253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (previous_callout != NULL && after_manual_callout-- <= 0) 414353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 414453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (lengthptr == NULL) /* Don't attempt in pre-compile phase */ 414553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis complete_callout(previous_callout, ptr, cb); 414653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis previous_callout = NULL; 414753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 414853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 414953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_AUTO_CALLOUT) != 0) 415053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 415153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis previous_callout = code; 415253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code = auto_callout(code, ptr, cb); 415353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 4154f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4155f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4156f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Process the next pattern item. */ 4157f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4158f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich switch(c) 4159f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4160f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* ===================================================================*/ 416153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* The branch terminates at string end or | or ) */ 416253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 416353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_NULL: 416453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr < cb->end_pattern) goto NORMAL_CHAR; /* Zero data character */ 416553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Fall through */ 416653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 416753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_VERTICAL_LINE: 4168f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_RIGHT_PARENTHESIS: 416953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *firstcuptr = firstcu; 417053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *firstcuflagsptr = firstcuflags; 417153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *reqcuptr = reqcu; 417253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *reqcuflagsptr = reqcuflags; 4173f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *codeptr = code; 4174f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *ptrptr = ptr; 4175f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (lengthptr != NULL) 4176f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 417753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (OFLOW_MAX - *lengthptr < (size_t)(code - last_code)) 4178f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4179f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR20; 4180f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 4181f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 41828b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis *lengthptr += (size_t)(code - last_code); /* To include callout length */ 4183f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4184f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich return TRUE; 4185f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4186f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4187f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* ===================================================================*/ 4188f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Handle single-character metacharacters. In multiline mode, ^ disables 4189f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich the setting of any following char as a first character. */ 4190f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4191f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_CIRCUMFLEX_ACCENT: 4192f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich previous = NULL; 419353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_MULTILINE) != 0) 4194f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 419553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (firstcuflags == REQ_UNSET) 419653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zerofirstcuflags = firstcuflags = REQ_NONE; 4197f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = OP_CIRCM; 4198f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4199f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else *code++ = OP_CIRC; 4200f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 4201f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4202f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_DOLLAR_SIGN: 4203f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich previous = NULL; 420453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *code++ = ((options & PCRE2_MULTILINE) != 0)? OP_DOLLM : OP_DOLL; 4205f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 4206f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4207f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* There can never be a first char if '.' is first, whatever happens about 420853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis repeats. The value of reqcu doesn't change either. */ 4209f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4210f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_DOT: 421153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; 421253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zerofirstcu = firstcu; 421353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zerofirstcuflags = firstcuflags; 421453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zeroreqcu = reqcu; 421553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zeroreqcuflags = reqcuflags; 4216f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich previous = code; 421753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *code++ = ((options & PCRE2_DOTALL) != 0)? OP_ALLANY: OP_ANY; 4218f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 4219f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4220f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4221f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* ===================================================================*/ 4222f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Character classes. If the included characters are all < 256, we build a 4223f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 32-byte bitmap of the permitted characters, except in the special case 4224f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich where there is only one such character. For negated classes, we build the 4225f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich map as usual, then invert it at the end. However, we use a different opcode 4226f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich so that data characters > 255 can be handled correctly. 4227f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4228f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich If the class contains characters outside the 0-255 range, a different 4229f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich opcode is compiled. It may optionally have a bit map for characters < 256, 4230f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich but those above are are explicitly listed afterwards. A flag byte tells 4231f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich whether the bitmap is present, and whether this is a negated class or not. 4232f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 423353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis An isolated ']' character is not treated specially, so is just another data 423453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis character. In earlier versions of PCRE that used the original API there was 423553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis a "JavaScript compatibility mode" in which it gave an error. However, 423653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis JavaScript itself has changed in this respect so there is no longer any 423753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis need for this special handling. 4238f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 423953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis In another (POSIX) regex library, the ugly syntax [[:<:]] and [[:>:]] is 4240f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich used for "start of word" and "end of word". As these are otherwise illegal 4241f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich sequences, we don't break anything by recognizing them. They are replaced 424253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis by \b(?=\w) and \b(?<=\w) respectively. This can only happen at the top 424353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis nesting level, as no other inserted sequences will contains these oddities. 424453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis Sequences like [a[:<:]] are erroneous and are handled by the normal code 424553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis below. */ 4246f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4247f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_LEFT_SQUARE_BRACKET: 424853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (PRIV(strncmp_c8)(ptr+1, STRING_WEIRD_STARTWORD, 6) == 0) 4249f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 425053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->nestptr[0] = ptr + 7; 42518b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis ptr = sub_start_of_word; 42528b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis goto REDO_LOOP; 4253f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4254f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 425553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (PRIV(strncmp_c8)(ptr+1, STRING_WEIRD_ENDWORD, 6) == 0) 4256f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 425753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->nestptr[0] = ptr + 7; 42588b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis ptr = sub_end_of_word; 42598b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis goto REDO_LOOP; 4260f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4261f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4262f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Handle a real character class. */ 4263f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4264f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich previous = code; 4265f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4266f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* PCRE supports POSIX class stuff inside a class. Perl gives an error if 4267f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich they are encountered at the top level, so we'll do that too. */ 4268f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4269f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || 4270f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr[1] == CHAR_EQUALS_SIGN) && 4271f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich check_posix_syntax(ptr, &tempptr)) 4272f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 427353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = (ptr[1] == CHAR_COLON)? ERR12 : ERR13; 4274f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 4275f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4276f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4277f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If the first character is '^', set the negation flag and skip it. Also, 4278f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if the first few characters (either before or after ^) are \Q\E or \E we 4279f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich skip them too. This makes for compatibility with Perl. */ 4280f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4281f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich negate_class = FALSE; 4282f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich for (;;) 4283f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4284f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich c = *(++ptr); 4285f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (c == CHAR_BACKSLASH) 4286f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4287f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (ptr[1] == CHAR_E) 4288f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 428953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (PRIV(strncmp_c8)(ptr + 1, STR_Q STR_BACKSLASH STR_E, 3) == 0) 4290f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr += 3; 4291f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 4292f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 4293f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4294f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT) 4295f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich negate_class = TRUE; 4296f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else break; 4297f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4298f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 429953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Empty classes are allowed if PCRE2_ALLOW_EMPTY_CLASS is set. Otherwise, 4300f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich an initial ']' is taken as a data character -- the code below handles 430153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis that. When empty classes are allowed, [] must always fail, so generate 430253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis OP_FAIL, whereas [^] must match any character, so generate OP_ALLANY. */ 4303f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4304f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (c == CHAR_RIGHT_SQUARE_BRACKET && 430553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (cb->external_options & PCRE2_ALLOW_EMPTY_CLASS) != 0) 4306f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4307f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = negate_class? OP_ALLANY : OP_FAIL; 430853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; 430953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zerofirstcu = firstcu; 431053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zerofirstcuflags = firstcuflags; 4311f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 4312f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4313f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 431453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If a non-extended class contains a negative special such as \S, we need 431553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis to flip the negation flag at the end, so that support for characters > 255 431653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis works correctly (they are all included in the class). An extended class may 431753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis need to insert specific matching or non-matching code for wide characters. 431853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis */ 4319f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 432053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis should_flip_negation = match_all_or_no_wide_chars = FALSE; 4321f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4322f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Extended class (xclass) will be used when characters > 255 4323f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich might match. */ 4324f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 432553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_WIDE_CHARS 4326f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich xclass = FALSE; 4327f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich class_uchardata = code + LINK_SIZE + 2; /* For XCLASS items */ 4328f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich class_uchardata_base = class_uchardata; /* Save the start */ 4329f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 4330f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4331f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* For optimization purposes, we track some properties of the class: 433253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis class_has_8bitchar will be non-zero if the class contains at least one 256 433353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis character with a code point less than 256; class_one_char will be 1 if the 433453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis class contains just one character; xclass_has_prop will be TRUE if Unicode 433553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis property checks are present in the class. */ 4336f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4337f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich class_has_8bitchar = 0; 4338f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich class_one_char = 0; 433953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_WIDE_CHARS 4340f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich xclass_has_prop = FALSE; 4341f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 4342f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 434353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Initialize the 256-bit (32-byte) bit map to all zeros. We build the map 434453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis in a temporary bit of memory, in case the class contains fewer than two 4345f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8-bit characters because in that case the compiled code doesn't use the bit 4346f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich map. */ 4347f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 434853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis memset(classbits, 0, 32 * sizeof(uint8_t)); 4349f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 435053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Process characters until ] is reached. As the test is at the end of the 435153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis loop, an initial ] is taken as a data character. At the start of the loop, 435253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c contains the first code unit of the character. If it is zero, check for 435353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis the end of the pattern, to allow binary zero as data. */ 4354f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 435553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for(;;) 4356f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 435753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_SPTR oldptr; 435853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef EBCDIC 435953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis BOOL range_is_literal = TRUE; 436053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 436153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 436253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == CHAR_NULL && ptr >= cb->end_pattern) 436353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 436453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR6; /* Missing terminating ']' */ 436553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 436653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 4367f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 436853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 4369f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (utf && HAS_EXTRALEN(c)) 4370f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { /* Braces are required because the */ 4371f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */ 4372f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4373f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 4374f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4375f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Inside \Q...\E everything is literal except \E */ 4376f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4377f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (inescq) 4378f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4379f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E) /* If we are at \E */ 4380f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4381f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich inescq = FALSE; /* Reset literal state */ 4382f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; /* Skip the 'E' */ 438353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto CONTINUE_CLASS; /* Carry on with next char */ 4384f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4385f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto CHECK_RANGE; /* Could be range if \E follows */ 4386f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4387f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4388f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Handle POSIX class names. Perl allows a negation extension of the 4389f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich form [:^name:]. A square bracket that doesn't match the syntax is 4390f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich treated as a literal. We also recognize the POSIX constructions 4391f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich [.ch.] and [=ch=] ("collating elements") and fault them, as Perl 4392f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5.6 and 5.8 do. */ 4393f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4394f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (c == CHAR_LEFT_SQUARE_BRACKET && 4395f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || 4396f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr)) 4397f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4398f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich BOOL local_negate = FALSE; 4399f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int posix_class, taboffset, tabopt; 440053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis register const uint8_t *cbits = cb->cbits; 440153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint8_t pbits[32]; 4402f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4403f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (ptr[1] != CHAR_COLON) 4404f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 440553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR13; 4406f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 4407f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4408f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4409f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr += 2; 4410f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*ptr == CHAR_CIRCUMFLEX_ACCENT) 4411f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4412f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich local_negate = TRUE; 4413f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich should_flip_negation = TRUE; /* Note negative special */ 4414f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 4415f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4416f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4417f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich posix_class = check_posix_name(ptr, (int)(tempptr - ptr)); 4418f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (posix_class < 0) 4419f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4420f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR30; 4421f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 4422f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4423f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4424f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If matching is caseless, upper and lower are converted to 4425f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich alpha. This relies on the fact that the class table starts with 4426f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich alpha, lower, upper as the first 3 entries. */ 4427f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 442853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_CASELESS) != 0 && posix_class <= 2) 4429f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich posix_class = 0; 4430f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 443153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* When PCRE2_UCP is set, some of the POSIX classes are converted to 4432f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich different escape sequences that use Unicode properties \p or \P. Others 4433f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich that are not available via \p or \P generate XCL_PROP/XCL_NOTPROP 443453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis directly. UCP support is not available unless UTF support is.*/ 4435f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 443653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 443753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_UCP) != 0) 4438f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4439f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich unsigned int ptype = 0; 4440f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0); 4441f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4442f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* The posix_substitutes table specifies which POSIX classes can be 444353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis converted to \p or \P items. This can only happen at top nestling 444453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis level, as there will never be a POSIX class in a string that is 444553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis substituted for something else. */ 4446f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4447f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (posix_substitutes[pc] != NULL) 4448f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 444953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->nestptr[0] = tempptr + 1; 4450f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr = posix_substitutes[pc] - 1; 445153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto CONTINUE_CLASS; 4452f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4453f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4454f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* There are three other classes that generate special property calls 4455f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich that are recognized only in an XCLASS. */ 4456f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4457f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else switch(posix_class) 4458f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4459f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case PC_GRAPH: 4460f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptype = PT_PXGRAPH; 4461f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Fall through */ 4462f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case PC_PRINT: 4463f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (ptype == 0) ptype = PT_PXPRINT; 4464f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Fall through */ 4465f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case PC_PUNCT: 4466f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (ptype == 0) ptype = PT_PXPUNCT; 4467f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP; 44688b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis *class_uchardata++ = (PCRE2_UCHAR)ptype; 4469f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *class_uchardata++ = 0; 4470f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich xclass_has_prop = TRUE; 4471f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr = tempptr + 1; 447253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto CONTINUE_CLASS; 4473f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 44740ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes /* For the other POSIX classes (ascii, xdigit) we are going to fall 44750ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes through to the non-UCP case and build a bit map for characters with 447653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code points less than 256. However, if we are in a negated POSIX 447753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis class, characters with code points greater than 255 must either all 447853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis match or all not match, depending on whether the whole class is not 447953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis or is negated. For example, for [[:^ascii:]... they must all match, 448053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis whereas for [^[:^xdigit:]... they must not. 448153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 448253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis In the special case where there are no xclass items, this is 448353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis automatically handled by the use of OP_CLASS or OP_NCLASS, but an 448453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis explicit range is needed for OP_XCLASS. Setting a flag here causes 448553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis the range to be generated later when it is known that OP_XCLASS is 448653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis required. */ 4487f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4488f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich default: 448953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis match_all_or_no_wide_chars |= local_negate; 4490f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 4491f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4492f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 449353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif /* SUPPORT_UNICODE */ 449453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 4495f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* In the non-UCP case, or when UCP makes no difference, we build the 4496f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bit map for the POSIX class in a chunk of local store because we may be 4497f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich adding and subtracting from it, and we don't want to subtract bits that 4498f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich may be in the main map already. At the end we or the result into the 4499f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bit map that is being built. */ 4500f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4501f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich posix_class *= 3; 4502f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4503f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Copy in the first table (always present) */ 4504f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4505f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich memcpy(pbits, cbits + posix_class_maps[posix_class], 450653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 32 * sizeof(uint8_t)); 4507f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4508f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If there is a second table, add or remove it as required. */ 4509f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4510f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich taboffset = posix_class_maps[posix_class + 1]; 4511f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich tabopt = posix_class_maps[posix_class + 2]; 4512f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4513f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (taboffset >= 0) 4514f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4515f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (tabopt >= 0) 45168b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis for (c = 0; c < 32; c++) pbits[c] |= cbits[(int)c + taboffset]; 4517f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 45188b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis for (c = 0; c < 32; c++) pbits[c] &= ~cbits[(int)c + taboffset]; 4519f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4520f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4521f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Now see if we need to remove any special characters. An option 4522f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich value of 1 removes vertical space and 2 removes underscore. */ 4523f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4524f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (tabopt < 0) tabopt = -tabopt; 4525f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (tabopt == 1) pbits[1] &= ~0x3c; 4526f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (tabopt == 2) pbits[11] &= 0x7f; 4527f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4528f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Add the POSIX table or its complement into the main table that is 4529f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich being built and we are done. */ 4530f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4531f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (local_negate) 4532f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich for (c = 0; c < 32; c++) classbits[c] |= ~pbits[c]; 4533f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 4534f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich for (c = 0; c < 32; c++) classbits[c] |= pbits[c]; 4535f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4536f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr = tempptr + 1; 4537f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Every class contains at least one < 256 character. */ 4538f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich class_has_8bitchar = 1; 4539f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Every class contains at least two characters. */ 4540f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich class_one_char = 2; 454153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto CONTINUE_CLASS; /* End of POSIX syntax handling */ 4542f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4543f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4544f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Backslash may introduce a single character, or it may introduce one 4545f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich of the specials, which just set a flag. The sequence \b is a special 4546f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case. Inside a class (and only there) it is treated as backspace. We 4547f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich assume that other escapes have more than one character in them, so 4548f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich speculatively set both class_has_8bitchar and class_one_char bigger 454953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis than one. Unrecognized escapes fall through and are faulted. */ 4550f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4551f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (c == CHAR_BACKSLASH) 4552f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 455353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis escape = PRIV(check_escape)(&ptr, cb->end_pattern, &ec, errorcodeptr, 455453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis options, TRUE, cb); 4555f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*errorcodeptr != 0) goto FAILED; 455653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (escape == 0) /* Escaped single char */ 455753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 455853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = ec; 455953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef EBCDIC 456053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis range_is_literal = FALSE; 456153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 456253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 4563f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (escape == ESC_b) c = CHAR_BS; /* \b is backspace in a class */ 4564f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (escape == ESC_N) /* \N is not supported in a class */ 4565f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4566f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR71; 4567f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 4568f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4569f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (escape == ESC_Q) /* Handle start of quoted string */ 4570f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4571f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) 4572f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4573f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr += 2; /* avoid empty string */ 4574f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4575f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else inescq = TRUE; 457653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto CONTINUE_CLASS; 4577f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 457853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (escape == ESC_E) goto CONTINUE_CLASS; /* Ignore orphan \E */ 4579f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 458053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else /* Handle \d-type escapes */ 4581f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 458253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis register const uint8_t *cbits = cb->cbits; 4583f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Every class contains at least two < 256 characters. */ 4584f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich class_has_8bitchar++; 4585f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Every class contains at least two characters. */ 4586f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich class_one_char += 2; 4587f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4588f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich switch (escape) 4589f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 459053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 4591f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case ESC_du: /* These are the values given for \d etc */ 459253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case ESC_DU: /* when PCRE2_UCP is set. We replace the */ 4593f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case ESC_wu: /* escape sequence with an appropriate \p */ 4594f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case ESC_WU: /* or \P to test Unicode properties instead */ 459553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case ESC_su: /* of the default ASCII testing. This might be */ 459653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case ESC_SU: /* a 2nd-level nesting for [[:<:]] or [[:>:]]. */ 459753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->nestptr[1] = cb->nestptr[0]; 459853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->nestptr[0] = ptr; 4599f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr = substitutes[escape - ESC_DU] - 1; /* Just before substitute */ 4600f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich class_has_8bitchar--; /* Undo! */ 460153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 4602f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 4603f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case ESC_d: 4604f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_digit]; 460553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 4606f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4607f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case ESC_D: 4608f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich should_flip_negation = TRUE; 4609f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit]; 461053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 4611f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4612f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case ESC_w: 4613f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_word]; 461453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 4615f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4616f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case ESC_W: 4617f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich should_flip_negation = TRUE; 4618f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word]; 461953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 4620f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4621f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl 4622f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5.18. Before PCRE 8.34, we had to preserve the VT bit if it was 4623f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich previously set by something earlier in the character class. 4624f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so 4625f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich we could just adjust the appropriate bit. From PCRE 8.34 we no 4626f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich longer treat \s and \S specially. */ 4627f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4628f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case ESC_s: 4629f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_space]; 463053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 4631f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4632f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case ESC_S: 4633f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich should_flip_negation = TRUE; 4634f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space]; 463553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 4636f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4637f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* The rest apply in both UCP and non-UCP cases. */ 4638f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4639f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case ESC_h: 464053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (void)add_list_to_class(classbits, &class_uchardata, options, cb, 4641f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PRIV(hspace_list), NOTACHAR); 464253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 4643f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4644f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case ESC_H: 4645f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich (void)add_not_list_to_class(classbits, &class_uchardata, options, 464653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb, PRIV(hspace_list)); 464753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 4648f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4649f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case ESC_v: 465053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (void)add_list_to_class(classbits, &class_uchardata, options, cb, 4651f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PRIV(vspace_list), NOTACHAR); 465253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 4653f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4654f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case ESC_V: 4655f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich (void)add_not_list_to_class(classbits, &class_uchardata, options, 465653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb, PRIV(vspace_list)); 465753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 4658f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4659f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case ESC_p: 4660f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case ESC_P: 466153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 4662f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4663f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich BOOL negated; 4664f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich unsigned int ptype = 0, pdata = 0; 466553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr, cb)) 4666f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 4667f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *class_uchardata++ = ((escape == ESC_p) != negated)? 4668f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich XCL_PROP : XCL_NOTPROP; 4669f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *class_uchardata++ = ptype; 4670f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *class_uchardata++ = pdata; 4671f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich xclass_has_prop = TRUE; 4672f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich class_has_8bitchar--; /* Undo! */ 4673f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 467453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 46750ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes#else 46760ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes *errorcodeptr = ERR45; 46770ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes goto FAILED; 4678f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 467953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Unrecognized escapes are faulted. */ 4680f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4681f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich default: 468253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR7; 468353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 4684f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 468553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 468653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Handled \d-type escape */ 468753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 468853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto CONTINUE_CLASS; 4689f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4690f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 469153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Control gets here if the escape just defined a single character. 469253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis This is in c and may be greater than 256. */ 4693f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4694f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich escape = 0; 4695f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } /* End of backslash handling */ 4696f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4697f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* A character may be followed by '-' to form a range. However, Perl does 4698f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich not permit ']' to be the end of the range. A '-' character at the end is 4699f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich treated as a literal. Perl ignores orphaned \E sequences entirely. The 4700f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code for handling \Q and \E is messy. */ 4701f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4702f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHECK_RANGE: 4703f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich while (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) 4704f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4705f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich inescq = FALSE; 4706f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr += 2; 4707f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4708f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich oldptr = ptr; 4709f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4710f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Remember if \r or \n were explicitly used */ 4711f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 471253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == CHAR_CR || c == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF; 4713f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4714f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Check for range */ 4715f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4716f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (!inescq && ptr[1] == CHAR_MINUS) 4717f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 471853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t d; 4719f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr += 2; 4720f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2; 4721f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4722f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If we hit \Q (not followed by \E) at this point, go into escaped 4723f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich mode. */ 4724f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4725f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q) 4726f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4727f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr += 2; 4728f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) 4729f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { ptr += 2; continue; } 4730f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich inescq = TRUE; 4731f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 4732f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4733f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4734f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Minus (hyphen) at the end of a class is treated as a literal, so put 4735f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich back the pointer and jump to handle the character that preceded it. */ 4736f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4737f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*ptr == CHAR_NULL || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET)) 4738f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4739f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr = oldptr; 4740f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto CLASS_SINGLE_CHARACTER; 4741f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4742f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4743f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Otherwise, we have a potential range; pick up the next character */ 4744f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 474553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 4746f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (utf) 4747f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { /* Braces are required because the */ 4748f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich GETCHARLEN(d, ptr, ptr); /* macro generates multiple statements */ 4749f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4750f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 4751f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 475253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis d = *ptr; /* Not UTF mode */ 4753f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4754f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* The second part of a range can be a single-character escape 4755f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich sequence, but not any of the other escapes. Perl treats a hyphen as a 4756f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich literal in such circumstances. However, in Perl's warning mode, a 4757f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich warning is given, so PCRE now faults it as it is almost certainly a 4758f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich mistake on the user's part. */ 4759f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4760f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (!inescq) 4761f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4762f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (d == CHAR_BACKSLASH) 4763f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4764f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int descape; 476553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis descape = PRIV(check_escape)(&ptr, cb->end_pattern, &d, 476653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcodeptr, options, TRUE, cb); 4767f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*errorcodeptr != 0) goto FAILED; 476853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef EBCDIC 476953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis range_is_literal = FALSE; 477053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 4771f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* 0 means a character was put into d; \b is backspace; any other 4772f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich special causes an error. */ 4773f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4774f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (descape != 0) 4775f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4776f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (descape == ESC_b) d = CHAR_BS; else 4777f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 477853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR50; 4779f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 4780f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4781f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4782f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4783f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4784f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* A hyphen followed by a POSIX class is treated in the same way. */ 4785f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4786f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (d == CHAR_LEFT_SQUARE_BRACKET && 4787f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT || 4788f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr[1] == CHAR_EQUALS_SIGN) && 4789f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich check_posix_syntax(ptr, &tempptr)) 4790f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 479153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR50; 4792f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 4793f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4794f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4795f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4796f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Check that the two values are in the correct order. Optimize 4797f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich one-character ranges. */ 4798f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4799f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (d < c) 4800f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4801f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR8; 4802f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 4803f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4804f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (d == c) goto CLASS_SINGLE_CHARACTER; /* A few lines below */ 4805f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4806f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* We have found a character range, so single character optimizations 4807f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cannot be done anymore. Any value greater than 1 indicates that there 4808f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich is more than one character. */ 4809f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4810f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich class_one_char = 2; 4811f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4812f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Remember an explicit \r or \n, and add the range to the class. */ 4813f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 481453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (d == CHAR_CR || d == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF; 4815f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 481653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* In an EBCDIC environment, Perl treats alphabetic ranges specially 481753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis because there are holes in the encoding, and simply using the range A-Z 481853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (for example) would include the characters in the holes. This applies 481953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis only to literal ranges; [\xC1-\xE9] is different to [A-Z]. */ 482053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 482153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef EBCDIC 482253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (range_is_literal && 482353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (cb->ctypes[c] & ctype_letter) != 0 && 482453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (cb->ctypes[d] & ctype_letter) != 0 && 482553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (c <= CHAR_z) == (d <= CHAR_z)) 482653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 482753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t uc = (c <= CHAR_z)? 0 : 64; 482853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t C = c - uc; 482953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t D = d - uc; 483053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 483153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (C <= CHAR_i) 483253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 483353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis class_has_8bitchar += 483453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis add_to_class(classbits, &class_uchardata, options, cb, C + uc, 483553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ((D < CHAR_i)? D : CHAR_i) + uc); 483653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis C = CHAR_j; 483753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 483853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 483953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (C <= D && C <= CHAR_r) 484053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 484153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis class_has_8bitchar += 484253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis add_to_class(classbits, &class_uchardata, options, cb, C + uc, 484353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ((D < CHAR_r)? D : CHAR_r) + uc); 484453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis C = CHAR_s; 484553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 4846f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 484753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (C <= D) 484853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 484953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis class_has_8bitchar += 485053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis add_to_class(classbits, &class_uchardata, options, cb, C + uc, 485153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis D + uc); 485253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 485353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 485453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 485553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 485653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis class_has_8bitchar += 485753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis add_to_class(classbits, &class_uchardata, options, cb, c, d); 485853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto CONTINUE_CLASS; /* Go get the next char in the class */ 4859f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4860f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4861f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Handle a single character - we can get here for a normal non-escape 4862f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich char, or after \ that introduces a single character or for an apparent 4863f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich range that isn't. Only the value 1 matters for class_one_char, so don't 4864f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich increase it if it is already 2 or more ... just in case there's a class 4865f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich with a zillion characters in it. */ 4866f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4867f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CLASS_SINGLE_CHARACTER: 4868f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (class_one_char < 2) class_one_char++; 4869f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 487053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If class_one_char is 1 and xclass_has_prop is false, we have the first 48710ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes single character in the class, and there have been no prior ranges, or 48720ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes XCLASS items generated by escapes. If this is the final character in the 48730ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes class, we can optimize by turning the item into a 1-character OP_CHAR[I] 48740ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes if it's positive, or OP_NOT[I] if it's negative. In the positive case, it 487553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis can cause firstcu to be set. Otherwise, there can be no first char if 48760ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes this item is first, whatever repeat count may follow. In the case of 487753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcu, save the previous value for reinstating. */ 4878f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 48790ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes if (!inescq && 488053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 48810ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes !xclass_has_prop && 48820ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes#endif 48830ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) 4884f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4885f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 488653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zeroreqcu = reqcu; 488753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zeroreqcuflags = reqcuflags; 4888f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4889f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (negate_class) 4890f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 489153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 4892f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int d; 4893f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 489453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; 489553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zerofirstcu = firstcu; 489653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zerofirstcuflags = firstcuflags; 4897f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 489853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* For caseless UTF mode, check whether this character has more than 489953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis one other case. If so, generate a special OP_NOTPROP item instead of 490053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis OP_NOTI. */ 4901f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 490253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 490353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (utf && (options & PCRE2_CASELESS) != 0 && 4904f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich (d = UCD_CASESET(c)) != 0) 4905f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 4906f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = OP_NOTPROP; 4907f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = PT_CLIST; 4908f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = d; 4909f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4910f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 4911f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 4912f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Char has only one other case, or UCP not available */ 4913f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4914f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 491553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *code++ = ((options & PCRE2_CASELESS) != 0)? OP_NOTI: OP_NOT; 491653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code += PUTCHAR(c, code); 4917f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4918f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4919f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* We are finished with this character class */ 4920f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4921f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto END_CLASS; 4922f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 4923f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4924f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* For a single, positive character, get the value into mcbuffer, and 4925f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich then we can handle this with the normal one-character code. */ 4926f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 492753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis mclength = PUTCHAR(c, mcbuffer); 4928f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto ONE_CHAR; 4929f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } /* End of 1-char optimization */ 4930f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4931f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* There is more than one character in the class, or an XCLASS item 4932f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich has been generated. Add this character to the class. */ 4933f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4934f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich class_has_8bitchar += 493553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis add_to_class(classbits, &class_uchardata, options, cb, c, c); 4936f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 493753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Continue to the next character in the class. Closing square bracket 493853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis not within \Q..\E ends the class. A NULL character terminates a 493953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis nested substitution string, but may be a data character in the main 494053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis pattern (tested at the start of this loop). */ 4941f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 494253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis CONTINUE_CLASS: 494353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = *(++ptr); 494453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == CHAR_NULL && cb->nestptr[0] != NULL) 494553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 494653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr = cb->nestptr[0]; 494753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->nestptr[0] = cb->nestptr[1]; 494853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->nestptr[1] = NULL; 494953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = *(++ptr); 495053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 4951f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 495253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_WIDE_CHARS 495353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If any wide characters have been encountered, set xclass = TRUE. Then, 495453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis in the pre-compile phase, accumulate the length of the wide characters 495553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis and reset the pointer. This is so that very large classes that contain a 495653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zillion wide characters do not overwrite the work space (which is on the 495753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis stack). */ 4958f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 495953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (class_uchardata > class_uchardata_base) 496053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 496153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis xclass = TRUE; 496253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (lengthptr != NULL) 496353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 496453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *lengthptr += class_uchardata - class_uchardata_base; 496553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis class_uchardata = class_uchardata_base; 496653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 496753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 4968f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 496953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* An unescaped ] ends the class */ 497053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 497153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == CHAR_RIGHT_SQUARE_BRACKET && !inescq) break; 497253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } /* End of main class-processing loop */ 4973f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 4974f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If this is the first thing in the branch, there can be no first char 497553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis setting, whatever the repeat count. Any reqcu setting must remain 4976f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich unchanged after any kind of repeat. */ 4977f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 497853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; 497953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zerofirstcu = firstcu; 498053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zerofirstcuflags = firstcuflags; 498153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zeroreqcu = reqcu; 498253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zeroreqcuflags = reqcuflags; 498353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 498453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If there are characters with values > 255, or Unicode property settings 498553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (\p or \P), we have to compile an extended class, with its own opcode, 498653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis unless there were no property settings and there was a negated special such 498753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis as \S in the class, and PCRE2_UCP is not set, because in that case all 498853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis characters > 255 are in or not in the class, so any that were explicitly 498953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis given as well can be ignored. 499053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 499153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis In the UCP case, if certain negated POSIX classes ([:^ascii:] or 499253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis [^:xdigit:]) were present in a class, we either have to match or not match 499353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis all wide characters (depending on whether the whole class is or is not 499453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis negated). This requirement is indicated by match_all_or_no_wide_chars being 499553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis true. We do this by including an explicit range, which works in both cases. 499653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 499753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis If, when generating an xclass, there are no characters < 256, we can omit 499853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis the bitmap in the actual compiled code. */ 499953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 500053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_WIDE_CHARS 500153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 50020ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes if (xclass && (xclass_has_prop || !should_flip_negation || 500353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (options & PCRE2_UCP) != 0)) 500453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#elif PCRE2_CODE_UNIT_WIDTH != 8 50050ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes if (xclass && (xclass_has_prop || !should_flip_negation)) 5006f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 5007f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 500853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (match_all_or_no_wide_chars) 500953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 501053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *class_uchardata++ = XCL_RANGE; 501153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis class_uchardata += PRIV(ord2utf)(0x100, class_uchardata); 501253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis class_uchardata += PRIV(ord2utf)(MAX_UTF_CODE_POINT, class_uchardata); 501353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 5014f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *class_uchardata++ = XCL_END; /* Marks the end of extra data */ 5015f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = OP_XCLASS; 5016f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code += LINK_SIZE; 5017f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code = negate_class? XCL_NOT:0; 5018f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (xclass_has_prop) *code |= XCL_HASPROP; 5019f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5020f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If the map is required, move up the extra data to make room for it; 5021f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich otherwise just move the code pointer to the end of the extra data. */ 5022f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5023f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (class_has_8bitchar > 0) 5024f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5025f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ |= XCL_MAP; 502653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis memmove(code + (32 / sizeof(PCRE2_UCHAR)), code, 502753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis CU2BYTES(class_uchardata - code)); 5028f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (negate_class && !xclass_has_prop) 5029f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich for (c = 0; c < 32; c++) classbits[c] = ~classbits[c]; 5030f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich memcpy(code, classbits, 32); 503153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code = class_uchardata + (32 / sizeof(PCRE2_UCHAR)); 5032f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5033f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else code = class_uchardata; 5034f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5035f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Now fill in the complete length of the item */ 5036f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5037f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT(previous, 1, (int)(code - previous)); 5038f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; /* End of class handling */ 5039f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5040f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 5041f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5042f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If there are no characters > 255, or they are all to be included or 5043f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the 5044f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich whole class was negated and whether there were negative specials such as \S 5045f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich (non-UCP) in the class. Then copy the 32-byte map into the code vector, 5046f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich negating it if necessary. */ 5047f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5048f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS; 5049f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (lengthptr == NULL) /* Save time in the pre-compile phase */ 5050f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5051f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (negate_class) 5052f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich for (c = 0; c < 32; c++) classbits[c] = ~classbits[c]; 5053f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich memcpy(code, classbits, 32); 5054f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 505553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code += 32 / sizeof(PCRE2_UCHAR); 5056f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5057f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich END_CLASS: 5058f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 5059f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5060f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5061f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* ===================================================================*/ 5062f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Various kinds of repeat; '{' is not necessarily a quantifier, but this 5063f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich has been tested above. */ 5064f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5065f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_LEFT_CURLY_BRACKET: 5066f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (!is_quantifier) goto NORMAL_CHAR; 5067f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr); 5068f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*errorcodeptr != 0) goto FAILED; 5069f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto REPEAT; 5070f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5071f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_ASTERISK: 5072f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich repeat_min = 0; 5073f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich repeat_max = -1; 5074f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto REPEAT; 5075f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5076f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_PLUS: 5077f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich repeat_min = 1; 5078f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich repeat_max = -1; 5079f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto REPEAT; 5080f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5081f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_QUESTION_MARK: 5082f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich repeat_min = 0; 5083f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich repeat_max = 1; 5084f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5085f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich REPEAT: 5086f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (previous == NULL) 5087f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5088f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR9; 5089f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 5090f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5091f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5092f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (repeat_min == 0) 5093f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 509453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis firstcu = zerofirstcu; /* Adjust for zero repeat */ 509553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis firstcuflags = zerofirstcuflags; 509653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcu = zeroreqcu; /* Ditto */ 509753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcuflags = zeroreqcuflags; 5098f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5099f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5100f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Remember whether this is a variable length repeat */ 5101f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5102f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY; 5103f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5104f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich op_type = 0; /* Default single-char op codes */ 5105f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich possessive_quantifier = FALSE; /* Default not possessive quantifier */ 5106f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5107f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Save start of previous item, in case we have to move it up in order to 5108f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich insert something before it. */ 5109f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5110f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich tempcode = previous; 5111f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5112f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Before checking for a possessive quantifier, we must skip over 5113f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich whitespace and comments in extended mode because Perl allows white space at 5114f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich this point. */ 5115f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 511653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_EXTENDED) != 0) 5117f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 511853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 5119f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich for (;;) 5120f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 512153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_space) != 0) ptr++; 512253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*ptr != CHAR_NUMBER_SIGN) break; 512353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 512453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (ptr < cb->end_pattern) 5125f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 512653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (IS_NEWLINE(ptr)) /* For non-fixed-length newline cases, */ 512753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { /* IS_NEWLINE sets cb->nllen. */ 512853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += cb->nllen; 5129f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 5130f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 513153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 513253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 513353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (utf) FORWARDCHAR(ptr); 5134f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 5135f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } /* Loop for comment characters */ 5136f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } /* Loop for multiple comments */ 513753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr--; /* Last code unit of previous character. */ 5138f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5139f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5140f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If the next character is '+', we have a possessive quantifier. This 514153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis implies greediness, whatever the setting of the PCRE2_UNGREEDY option. 5142f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich If the next character is '?' this is a minimizing repeat, by default, 514353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis but if PCRE2_UNGREEDY is set, it works the other way round. We change the 5144f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich repeat type to the non-default. */ 5145f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5146f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (ptr[1] == CHAR_PLUS) 5147f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5148f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich repeat_type = 0; /* Force greedy */ 5149f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich possessive_quantifier = TRUE; 5150f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 5151f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5152f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (ptr[1] == CHAR_QUESTION_MARK) 5153f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5154f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich repeat_type = greedy_non_default; 5155f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 5156f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5157f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else repeat_type = greedy_default; 5158f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 515953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If the repeat is {1} we can ignore it. */ 516053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 516153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT; 516253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 5163f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If previous was a recursion call, wrap it in atomic brackets so that 5164f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich previous becomes the atomic group. All recursions were so wrapped in the 5165f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich past, but it no longer happens for non-repeated recursions. In fact, the 5166f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich repeated ones could be re-implemented independently so as not to need this, 5167f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich but for the moment we rely on the code for repeating groups. */ 5168f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5169f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*previous == OP_RECURSE) 5170f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 517153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE)); 5172f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *previous = OP_ONCE; 5173f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT(previous, 1, 2 + 2*LINK_SIZE); 5174f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich previous[2 + 2*LINK_SIZE] = OP_KET; 5175f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT(previous, 3 + 2*LINK_SIZE, 2 + 2*LINK_SIZE); 5176f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code += 2 + 2 * LINK_SIZE; 5177f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich length_prevgroup = 3 + 3*LINK_SIZE; 5178f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5179f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5180f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Now handle repetition for the different types of item. */ 5181f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5182f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If previous was a character or negated character match, abolish the item 5183f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich and generate a repeat item instead. If a char item has a minimum of more 518453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis than one, ensure that it is set in reqcu - it might not be if a sequence 5185f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich such as x{3} is the first thing in a branch because the x will have gone 518653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis into firstcu instead. */ 5187f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5188f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*previous == OP_CHAR || *previous == OP_CHARI 5189f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich || *previous == OP_NOT || *previous == OP_NOTI) 5190f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5191f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich switch (*previous) 5192f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5193f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich default: /* Make compiler happy. */ 5194f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CHAR: op_type = OP_STAR - OP_STAR; break; 5195f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CHARI: op_type = OP_STARI - OP_STAR; break; 5196f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOT: op_type = OP_NOTSTAR - OP_STAR; break; 5197f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTI: op_type = OP_NOTSTARI - OP_STAR; break; 5198f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5199f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 520053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Deal with UTF characters that take up more than one code unit. It's 5201f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich easier to write this out separately than try to macrify it. Use c to 520253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis hold the length of the character in code units, plus UTF_LENGTH to flag 520353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis that it's a length rather than a small character. */ 5204f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 520553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef MAYBE_UTF_MULTI 520653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (utf && NOT_FIRSTCU(code[-1])) 5207f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 520853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_UCHAR *lastchar = code - 1; 5209f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich BACKCHAR(lastchar); 521053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = (int)(code - lastchar); /* Length of UTF character */ 521153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis memcpy(utf_units, lastchar, CU2BYTES(c)); /* Save the char */ 521253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c |= UTF_LENGTH; /* Flag c as a length */ 5213f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5214f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 521553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif /* MAYBE_UTF_MULTI */ 5216f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5217f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Handle the case of a single charater - either with no UTF support, or 521853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis with UTF disabled, or for a single-code-unit UTF character. */ 5219f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5220f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich c = code[-1]; 5221f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*previous <= OP_CHARI && repeat_min > 1) 5222f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 522353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcu = c; 522453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcuflags = req_caseopt | cb->req_varyopt; 5225f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5226f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5227f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5228f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */ 5229f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5230f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5231f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If previous was a character type match (\d or similar), abolish it and 5232f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich create a suitable repeat item. The code is shared with single-character 5233f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich repeats by setting op_type to add a suitable offset into repeat_type. Note 523453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis the the Unicode property types will be present only when SUPPORT_UNICODE is 5235f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich defined, but we don't wrap the little bits of code here because it just 5236f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich makes it horribly messy. */ 5237f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5238f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (*previous < OP_EODN) 5239f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 524053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_UCHAR *oldcode; 5241f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int prop_type, prop_value; 524253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */ 524353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = *previous; /* Save previous opcode */ 524453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c == OP_PROP || c == OP_NOTPROP) 5245f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5246f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich prop_type = previous[1]; 5247f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich prop_value = previous[2]; 5248f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 524953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 525053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 525153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Come here from just above with a character in c */ 525253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis OUTPUT_SINGLE_REPEAT: 525353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis prop_type = prop_value = -1; 525453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 5255f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 525653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* At this point we either have prop_type == prop_value == -1 and either 525753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis a code point or a character type that is not OP_[NOT]PROP in c, or we 525853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis have OP_[NOT]PROP in c and prop_type/prop_value not negative. */ 525953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 526053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis oldcode = code; /* Save where we were */ 5261f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code = previous; /* Usually overwrite previous item */ 5262f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5263f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If the maximum is zero then the minimum must also be zero; Perl allows 5264f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich this case, so we do too - by simply omitting the item altogether. */ 5265f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5266f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (repeat_max == 0) goto END_REPEAT; 5267f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5268f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Combine the op_type with the repeat_type */ 5269f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5270f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich repeat_type += op_type; 5271f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5272f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* A minimum of zero is handled either as the special case * or ?, or as 5273f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich an UPTO, with the maximum given. */ 5274f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5275f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (repeat_min == 0) 5276f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5277f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (repeat_max == -1) *code++ = OP_STAR + repeat_type; 5278f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type; 5279f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 5280f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5281f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = OP_UPTO + repeat_type; 5282f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT2INC(code, 0, repeat_max); 5283f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5284f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5285f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5286f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* A repeat minimum of 1 is optimized into some special cases. If the 5287f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich maximum is unlimited, we use OP_PLUS. Otherwise, the original item is 5288f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich left in place and, if the maximum is greater than 1, we use OP_UPTO with 5289f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich one less than the maximum. */ 5290f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5291f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (repeat_min == 1) 5292f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5293f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (repeat_max == -1) 5294f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = OP_PLUS + repeat_type; 5295f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 5296f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 529753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code = oldcode; /* Leave previous item in place */ 5298f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (repeat_max == 1) goto END_REPEAT; 5299f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = OP_UPTO + repeat_type; 5300f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT2INC(code, 0, repeat_max - 1); 5301f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5302f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5303f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5304f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* The case {n,n} is just an EXACT, while the general case {n,m} is 530553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis handled as an EXACT followed by an UPTO or STAR or QUERY. */ 5306f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5307f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 5308f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5309f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */ 5310f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT2INC(code, 0, repeat_min); 5311f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 531253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Unless repeat_max equals repeat_min, fill in the data for EXACT, and 531353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis then generate the second opcode. In UTF mode, multi-code-unit 531453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis characters have their length in c, with the UTF_LENGTH bit as a flag, 531553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis and the code units in utf_units. For a repeated Unicode property match, 531653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis there are two extra values that define the required property, and c 531753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis never has the UTF_LENGTH bit set. */ 5318f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 531953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (repeat_max != repeat_min) 5320f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 532153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef MAYBE_UTF_MULTI 5322f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (utf && (c & UTF_LENGTH) != 0) 5323f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 532453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis memcpy(code, utf_units, CU2BYTES(c & 7)); 5325f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code += c & 7; 5326f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5327f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 532853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif /* MAYBE_UTF_MULTI */ 5329f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5330f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = c; 5331f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (prop_type >= 0) 5332f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5333f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = prop_type; 5334f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = prop_value; 5335f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5336f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5337f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 533853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Now set up the following opcode */ 5339f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 534053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (repeat_max < 0) *code++ = OP_STAR + repeat_type; else 5341f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 534253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis repeat_max -= repeat_min; 534353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (repeat_max == 1) 534453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 534553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *code++ = OP_QUERY + repeat_type; 534653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 534753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 534853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 534953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *code++ = OP_UPTO + repeat_type; 535053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PUT2INC(code, 0, repeat_max); 535153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 5352f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5353f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5354f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5355f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 535653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Fill in the character or character type for the final opcode. */ 5357f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 535853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef MAYBE_UTF_MULTI 5359f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (utf && (c & UTF_LENGTH) != 0) 5360f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 536153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis memcpy(code, utf_units, CU2BYTES(c & 7)); 5362f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code += c & 7; 5363f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5364f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 536553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif /* MAYBEW_UTF_MULTI */ 5366f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 536753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *code++ = c; 536853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (prop_type >= 0) 536953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 537053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *code++ = prop_type; 537153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *code++ = prop_value; 537253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 5373f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5374f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5375f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5376f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If previous was a character class or a back reference, we put the repeat 5377f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich stuff after it, but just skip the item if the repeat was {0,0}. */ 5378f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5379f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (*previous == OP_CLASS || *previous == OP_NCLASS || 538053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_WIDE_CHARS 5381f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *previous == OP_XCLASS || 5382f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 5383f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *previous == OP_REF || *previous == OP_REFI || 5384f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *previous == OP_DNREF || *previous == OP_DNREFI) 5385f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5386f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (repeat_max == 0) 5387f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5388f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code = previous; 5389f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto END_REPEAT; 5390f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5391f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5392f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (repeat_min == 0 && repeat_max == -1) 5393f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = OP_CRSTAR + repeat_type; 5394f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (repeat_min == 1 && repeat_max == -1) 5395f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = OP_CRPLUS + repeat_type; 5396f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (repeat_min == 0 && repeat_max == 1) 5397f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = OP_CRQUERY + repeat_type; 5398f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 5399f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5400f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = OP_CRRANGE + repeat_type; 5401f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT2INC(code, 0, repeat_min); 5402f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (repeat_max == -1) repeat_max = 0; /* 2-byte encoding for max */ 5403f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT2INC(code, 0, repeat_max); 5404f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5405f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5406f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5407f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If previous was a bracket group, we may have to replicate it in certain 5408f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cases. Note that at this point we can encounter only the "basic" bracket 5409f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich opcodes such as BRA and CBRA, as this is the place where they get converted 5410f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich into the more special varieties such as BRAPOS and SBRA. A test for >= 5411f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_ASSERT and <= OP_COND includes ASSERT, ASSERT_NOT, ASSERTBACK, 5412f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ASSERTBACK_NOT, ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND. 5413f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich Originally, PCRE did not allow repetition of assertions, but now it does, 5414f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich for Perl compatibility. */ 5415f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5416f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (*previous >= OP_ASSERT && *previous <= OP_COND) 5417f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5418f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich register int i; 5419f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int len = (int)(code - previous); 542053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_UCHAR *bralink = NULL; 542153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_UCHAR *brazeroptr = NULL; 5422f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 542353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Repeating a DEFINE group (or any group where the condition is always 542453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis FALSE and there is only one branch) is pointless, but Perl allows the 542553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis syntax, so we just ignore the repeat. */ 5426f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 542753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_FALSE && 542853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis previous[GET(previous, 1)] != OP_ALT) 5429f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto END_REPEAT; 5430f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5431f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* There is no sense in actually repeating assertions. The only potential 5432f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich use of repetition is in cases when the assertion is optional. Therefore, 5433f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if the minimum is greater than zero, just ignore the repeat. If the 5434f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich maximum is not zero or one, set it to 1. */ 5435f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5436f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*previous < OP_ONCE) /* Assertion */ 5437f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5438f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (repeat_min > 0) goto END_REPEAT; 5439f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (repeat_max < 0 || repeat_max > 1) repeat_max = 1; 5440f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5441f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5442f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* The case of a zero minimum is special because of the need to stick 5443f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_BRAZERO in front of it, and because the group appears once in the 5444f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich data, whereas in other cases it appears the minimum number of times. For 5445f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich this reason, it is simplest to treat this case separately, as otherwise 5446f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich the code gets far too messy. There are several special subcases when the 5447f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich minimum is zero. */ 5448f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5449f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (repeat_min == 0) 5450f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5451f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If the maximum is also zero, we used to just omit the group from the 5452f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich output altogether, like this: 5453f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5454f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ** if (repeat_max == 0) 5455f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ** { 5456f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ** code = previous; 5457f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ** goto END_REPEAT; 5458f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ** } 5459f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5460f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich However, that fails when a group or a subgroup within it is referenced 5461f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich as a subroutine from elsewhere in the pattern, so now we stick in 5462f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OP_SKIPZERO in front of it so that it is skipped on execution. As we 5463f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich don't have a list of which groups are referenced, we cannot do this 5464f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich selectively. 5465f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5466f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich If the maximum is 1 or unlimited, we just have to stick in the BRAZERO 546753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis and do no more at this point. */ 5468f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5469f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (repeat_max <= 1) /* Covers 0, 1, and unlimited */ 5470f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 547153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis memmove(previous + 1, previous, CU2BYTES(len)); 5472f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code++; 5473f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (repeat_max == 0) 5474f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5475f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *previous++ = OP_SKIPZERO; 5476f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto END_REPEAT; 5477f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5478f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich brazeroptr = previous; /* Save for possessive optimizing */ 5479f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *previous++ = OP_BRAZERO + repeat_type; 5480f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5481f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5482f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If the maximum is greater than 1 and limited, we have to replicate 5483f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich in a nested fashion, sticking OP_BRAZERO before each set of brackets. 5484f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich The first one has to be handled carefully because it's the original 5485f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich copy, which has to be moved up. The remainder can be handled by code 5486f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich that is common with the non-zero minimum case below. We have to 548753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis adjust the value or repeat_max, since one less copy is required. */ 5488f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5489f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 5490f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5491f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int offset; 549253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len)); 5493f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code += 2 + LINK_SIZE; 5494f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *previous++ = OP_BRAZERO + repeat_type; 5495f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *previous++ = OP_BRA; 5496f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5497f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* We chain together the bracket offset fields that have to be 5498f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich filled in later when the ends of the brackets are reached. */ 5499f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5500f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich offset = (bralink == NULL)? 0 : (int)(previous - bralink); 5501f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bralink = previous; 5502f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUTINC(previous, 0, offset); 5503f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5504f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5505f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich repeat_max--; 5506f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5507f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5508f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If the minimum is greater than zero, replicate the group as many 5509f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich times as necessary, and adjust the maximum to the number of subsequent 551053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis copies that we need. */ 5511f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5512f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 5513f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5514f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (repeat_min > 1) 5515f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5516f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* In the pre-compile phase, we don't actually do the replication. We 5517f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich just adjust the length as if we had. Do some paranoid checks for 5518f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit 5519f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich integer type when available, otherwise double. */ 5520f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5521f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (lengthptr != NULL) 5522f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 552353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis size_t delta = (repeat_min - 1)*length_prevgroup; 5524f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if ((INT64_OR_DOUBLE)(repeat_min - 1)* 5525f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich (INT64_OR_DOUBLE)length_prevgroup > 5526f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich (INT64_OR_DOUBLE)INT_MAX || 5527f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OFLOW_MAX - *lengthptr < delta) 5528f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5529f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR20; 5530f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 5531f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5532f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *lengthptr += delta; 5533f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5534f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5535f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* This is compiling for real. If there is a set first byte for 553653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis the group, and we have not yet set a "required byte", set it. */ 5537f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5538f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 5539f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 554053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (groupsetfirstcu && reqcuflags < 0) 5541f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 554253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcu = firstcu; 554353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcuflags = firstcuflags; 5544f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5545f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich for (i = 1; i < repeat_min; i++) 5546f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 554753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis memcpy(code, previous, CU2BYTES(len)); 5548f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code += len; 5549f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5550f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5551f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5552f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5553f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (repeat_max > 0) repeat_max -= repeat_min; 5554f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5555f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5556f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* This code is common to both the zero and non-zero minimum cases. If 5557f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich the maximum is limited, it replicates the group in a nested fashion, 5558f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich remembering the bracket starts on a stack. In the case of a zero minimum, 5559f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich the first one was set up above. In all cases the repeat_max now specifies 5560f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich the number of additional copies needed. Again, we must remember to 5561f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich replicate entries on the forward reference list. */ 5562f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5563f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (repeat_max >= 0) 5564f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5565f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* In the pre-compile phase, we don't actually do the replication. We 5566f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich just adjust the length as if we had. For each repetition we must add 1 5567f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich to the length for BRAZERO and for all but the last repetition we must 5568f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some 5569f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is 5570f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich a 64-bit integer type when available, otherwise double. */ 5571f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5572f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (lengthptr != NULL && repeat_max > 0) 5573f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 557453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis size_t delta = repeat_max*(length_prevgroup + 1 + 2 + 2*LINK_SIZE) - 5575f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 2 - 2*LINK_SIZE; /* Last one doesn't nest */ 5576f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if ((INT64_OR_DOUBLE)repeat_max * 5577f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE) 5578f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich > (INT64_OR_DOUBLE)INT_MAX || 5579f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OFLOW_MAX - *lengthptr < delta) 5580f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5581f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR20; 5582f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 5583f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5584f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *lengthptr += delta; 5585f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5586f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5587f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* This is compiling for real */ 5588f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5589f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else for (i = repeat_max - 1; i >= 0; i--) 5590f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5591f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = OP_BRAZERO + repeat_type; 5592f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5593f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* All but the final copy start a new nesting, maintaining the 5594f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich chain of brackets outstanding. */ 5595f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5596f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (i != 0) 5597f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5598f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int offset; 5599f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = OP_BRA; 5600f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich offset = (bralink == NULL)? 0 : (int)(code - bralink); 5601f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bralink = code; 5602f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUTINC(code, 0, offset); 5603f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5604f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 560553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis memcpy(code, previous, CU2BYTES(len)); 5606f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code += len; 5607f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5608f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5609f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Now chain through the pending brackets, and fill in their length 5610f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich fields (which are holding the chain links pro tem). */ 5611f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5612f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich while (bralink != NULL) 5613f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5614f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int oldlinkoffset; 5615f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int offset = (int)(code - bralink + 1); 561653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_UCHAR *bra = code - offset; 5617f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich oldlinkoffset = GET(bra, 1); 5618f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset; 5619f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = OP_KET; 5620f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUTINC(code, 0, offset); 5621f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT(bra, 1, offset); 5622f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5623f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5624f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5625f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If the maximum is unlimited, set a repeater in the final copy. For 5626f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ONCE brackets, that's all we need to do. However, possessively repeated 5627f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ONCE brackets can be converted into non-capturing brackets, as the 5628f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to 5629f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich deal with possessive ONCEs specially. 5630f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5631f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich Otherwise, when we are doing the actual compile phase, check to see 5632f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich whether this group is one that could match an empty string. If so, 5633f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so 5634f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich that runtime checking can be done. [This check is also applied to ONCE 5635f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich groups at runtime, but in a different way.] 5636f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5637f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich Then, if the quantifier was possessive and the bracket is not a 5638f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich conditional, we convert the BRA code to the POS form, and the KET code to 5639f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich KETRPOS. (It turns out to be convenient at runtime to detect this kind of 5640f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich subpattern at both the start and at the end.) The use of special opcodes 564153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis makes it possible to reduce greatly the stack usage in pcre2_match(). If 5642f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO. 5643f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5644f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich Then, if the minimum number of matches is 1 or 0, cancel the possessive 5645f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich flag so that the default action below, of wrapping everything inside 5646f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich atomic brackets, does not happen. When the minimum is greater than 1, 5647f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich there will be earlier copies of the group, and so we still have to wrap 5648f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich the whole thing. */ 5649f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5650f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 5651f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 565253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_UCHAR *ketcode = code - 1 - LINK_SIZE; 565353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_UCHAR *bracode = ketcode - GET(ketcode, 1); 5654f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5655f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Convert possessive ONCE brackets to non-capturing */ 5656f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5657f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) && 5658f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich possessive_quantifier) *bracode = OP_BRA; 5659f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5660f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* For non-possessive ONCE brackets, all we need to do is to 5661f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich set the KET. */ 5662f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5663f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC) 5664f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *ketcode = OP_KETRMAX + repeat_type; 5665f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5666f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Handle non-ONCE brackets and possessive ONCEs (which have been 5667f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich converted to non-capturing above). */ 5668f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5669f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 5670f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 567153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* In the compile phase, check whether the group could match an empty 567253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis string. */ 5673f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5674f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (lengthptr == NULL) 5675f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 567653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_UCHAR *scode = bracode; 5677f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich do 5678f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 567953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int count = 0; 568053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int rc = could_be_empty_branch(scode, ketcode, utf, cb, FALSE, 568153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis NULL, &count); 568253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (rc < 0) 568353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 568453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR86; 568553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 568653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 568753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (rc > 0) 5688f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5689f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *bracode += OP_SBRA - OP_BRA; 5690f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 5691f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5692f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich scode += GET(scode, 1); 5693f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5694f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich while (*scode == OP_ALT); 5695f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 569653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* A conditional group with only one branch has an implicit empty 569753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis alternative branch. */ 56980ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes 569953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*bracode == OP_COND && bracode[GET(bracode,1)] != OP_ALT) 570053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *bracode = OP_SCOND; 570153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 57020ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes 5703f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Handle possessive quantifiers. */ 5704f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5705f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (possessive_quantifier) 5706f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5707f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* For COND brackets, we wrap the whole thing in a possessively 5708f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich repeated non-capturing bracket, because we have not invented POS 570953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis versions of the COND opcodes. */ 5710f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5711f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*bracode == OP_COND || *bracode == OP_SCOND) 5712f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5713f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int nlen = (int)(code - bracode); 571453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen)); 5715f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code += 1 + LINK_SIZE; 5716f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich nlen += 1 + LINK_SIZE; 57170ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes *bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS; 5718f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = OP_KETRPOS; 5719f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUTINC(code, 0, nlen); 5720f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT(bracode, 1, nlen); 5721f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5722f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5723f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* For non-COND brackets, we modify the BRA code and use KETRPOS. */ 5724f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5725f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 5726f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5727f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *bracode += 1; /* Switch to xxxPOS opcodes */ 5728f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *ketcode = OP_KETRPOS; 5729f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5730f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5731f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If the minimum is zero, mark it as possessive, then unset the 5732f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich possessive flag when the minimum is 0 or 1. */ 5733f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5734f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO; 5735f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (repeat_min < 2) possessive_quantifier = FALSE; 5736f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5737f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5738f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Non-possessive quantifier */ 5739f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5740f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else *ketcode = OP_KETRMAX + repeat_type; 5741f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5742f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5743f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5744f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 574553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If previous is OP_FAIL, it was generated by an empty class [] 574653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (PCRE2_ALLOW_EMPTY_CLASS is set). The other ways in which OP_FAIL can be 574753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis generated, that is by (*FAIL) or (?!), set previous to NULL, which gives a 574853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis "nothing to repeat" error above. We can just ignore the repeat in empty 574953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis class case. */ 5750f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5751f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (*previous == OP_FAIL) goto END_REPEAT; 5752f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5753f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Else there's some kind of shambles */ 5754f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5755f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 5756f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 575753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR10; 5758f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 5759f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5760f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5761f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If the character following a repeat is '+', possessive_quantifier is 5762f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich TRUE. For some opcodes, there are special alternative opcodes for this 5763f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case. For anything else, we wrap the entire repeated item inside OP_ONCE 5764f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich brackets. Logically, the '+' notation is just syntactic sugar, taken from 5765f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich Sun's Java package, but the special opcodes can optimize it. 5766f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5767f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich Some (but not all) possessively repeated subpatterns have already been 5768f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich completely handled in the code just above. For them, possessive_quantifier 5769f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich is always FALSE at this stage. Note that the repeated item starts at 5770f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich tempcode, not at previous, which might be the first part of a string whose 5771f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich (former) last char we repeated. */ 5772f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5773f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (possessive_quantifier) 5774f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5775f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int len; 5776f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5777f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Possessifying an EXACT quantifier has no effect, so we can ignore it. 5778f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich However, QUERY, STAR, or UPTO may follow (for quantifiers such as {5,6}, 5779f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich {5,}, or {5,10}). We skip over an EXACT item; if the length of what 5780f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich remains is greater than zero, there's a further opcode that can be 5781f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich handled. If not, do nothing, leaving the EXACT alone. */ 5782f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5783f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich switch(*tempcode) 5784f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5785f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_TYPEEXACT: 5786f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich tempcode += PRIV(OP_lengths)[*tempcode] + 5787f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ((tempcode[1 + IMM2_SIZE] == OP_PROP 5788f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich || tempcode[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0); 5789f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 5790f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5791f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* CHAR opcodes are used for exacts whose count is 1. */ 5792f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5793f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CHAR: 5794f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CHARI: 5795f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOT: 5796f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTI: 5797f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_EXACT: 5798f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_EXACTI: 5799f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTEXACT: 5800f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NOTEXACTI: 5801f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich tempcode += PRIV(OP_lengths)[*tempcode]; 580253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 5803f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (utf && HAS_EXTRALEN(tempcode[-1])) 5804f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich tempcode += GET_EXTRALEN(tempcode[-1]); 5805f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 5806f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 5807f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5808f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* For the class opcodes, the repeat operator appears at the end; 5809f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich adjust tempcode to point to it. */ 5810f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5811f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CLASS: 5812f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_NCLASS: 581353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis tempcode += 1 + 32/sizeof(PCRE2_UCHAR); 5814f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 5815f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 581653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_WIDE_CHARS 5817f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_XCLASS: 5818f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich tempcode += GET(tempcode, 1); 5819f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 5820f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 5821f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5822f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5823f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If tempcode is equal to code (which points to the end of the repeated 5824f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich item), it means we have skipped an EXACT item but there is no following 5825f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich QUERY, STAR, or UPTO; the value of len will be 0, and we do nothing. In 5826f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich all other cases, tempcode will be pointing to the repeat opcode, and will 5827f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich be less than code, so the value of len will be greater than 0. */ 5828f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5829f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich len = (int)(code - tempcode); 5830f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (len > 0) 5831f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5832f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich unsigned int repcode = *tempcode; 5833f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5834f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* There is a table for possessifying opcodes, all of which are less 5835f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich than OP_CALLOUT. A zero entry means there is no possessified version. 5836f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich */ 5837f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5838f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (repcode < OP_CALLOUT && opcode_possessify[repcode] > 0) 5839f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *tempcode = opcode_possessify[repcode]; 5840f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5841f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* For opcode without a special possessified version, wrap the item in 584253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ONCE brackets. */ 5843f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5844f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 5845f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 584653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len)); 5847f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code += 1 + LINK_SIZE; 5848f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich len += 1 + LINK_SIZE; 5849f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich tempcode[0] = OP_ONCE; 5850f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = OP_KET; 5851f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUTINC(code, 0, len); 5852f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT(tempcode, 1, len); 5853f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5854f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5855f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5856f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5857f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* In all case we no longer have a previous item. We also set the 585853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis "follows varying string" flag for subsequently encountered reqcus if 5859f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich it isn't already set and we have just passed a varying length item. */ 5860f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5861f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich END_REPEAT: 5862f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich previous = NULL; 586353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->req_varyopt |= reqvary; 5864f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 5865f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5866f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5867f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* ===================================================================*/ 586853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Start of nested parenthesized sub-expression, or lookahead or lookbehind 586953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis or option setting or condition or all the other extended parenthesis forms. 587053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis We must save the current high-water-mark for the forward reference list so 587153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis that we know where they start for this group. However, because the list may 587253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis be extended when there are very many forward references (usually the result 587353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis of a replicated inner group), we must use an offset rather than an absolute 587453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis address. Note that (?# comments are dealt with at the top of the loop; 587553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis they do not get this far. */ 5876f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5877f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_LEFT_PARENTHESIS: 58788366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes ptr++; 5879f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 588053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Deal with various "verbs" that can be introduced by '*'. */ 5881f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5882f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':' 588353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis || (MAX_255(ptr[1]) && ((cb->ctypes[ptr[1]] & ctype_letter) != 0)))) 5884f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5885f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int i, namelen; 5886f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int arglen = 0; 5887f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich const char *vn = verbnames; 588853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_SPTR name = ptr + 1; 588953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_SPTR arg = NULL; 5890f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich previous = NULL; 5891f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 589253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 589353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Increment ptr, set namelen, check length */ 589453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 589553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis READ_NAME(ctype_letter, ERR60, *errorcodeptr); 5896f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5897f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* It appears that Perl allows any characters whatsoever, other than 5898f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich a closing parenthesis, to appear in arguments, so we no longer insist on 589953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis letters, digits, and underscores. Perl does not, however, do any 590053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis interpretation within arguments, and has no means of including a closing 590153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis parenthesis. PCRE supports escape processing but only when it is 590253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis requested by an option. Note that check_escape() will not return values 590353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis greater than the code unit maximum when not in UTF mode. */ 5904f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5905f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*ptr == CHAR_COLON) 5906f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5907f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich arg = ++ptr; 590853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 590953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_ALT_VERBNAMES) == 0) 5910f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 591153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis arglen = 0; 591253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) 591353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 591453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; /* Check length as we go */ 591553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis arglen++; /* along, to avoid the */ 591653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((unsigned int)arglen > MAX_MARK) /* possibility of overflow. */ 591753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 591853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR76; 591953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 592053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 592153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 592253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 592353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 592453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 592553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* The length check is in process_verb_names() */ 592653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis arglen = process_verb_name(&ptr, NULL, errorcodeptr, options, 592753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis utf, cb); 592853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (arglen < 0) goto FAILED; 5929f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5930f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5931f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5932f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*ptr != CHAR_RIGHT_PARENTHESIS) 5933f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5934f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR60; 5935f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 5936f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5937f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5938f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Scan the table of verb names */ 5939f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5940f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich for (i = 0; i < verbcount; i++) 5941f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5942f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (namelen == verbs[i].len && 594353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PRIV(strncmp_c8)(name, vn, namelen) == 0) 5944f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5945f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int setverb; 5946f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5947f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Check for open captures before ACCEPT and convert it to 5948f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ASSERT_ACCEPT if in an assertion. */ 5949f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5950f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (verbs[i].op == OP_ACCEPT) 5951f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5952f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich open_capitem *oc; 5953f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (arglen != 0) 5954f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5955f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR59; 5956f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 5957f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 595853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->had_accept = TRUE; 59598b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis 59608b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis /* In the first pass, just accumulate the length required; 59618b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis otherwise hitting (*ACCEPT) inside many nested parentheses can 59628b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis cause workspace overflow. */ 59638b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis 596453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (oc = cb->open_caps; oc != NULL; oc = oc->next) 5965f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 59668b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis if (lengthptr != NULL) 59678b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis { 59688b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis *lengthptr += CU2BYTES(1) + IMM2_SIZE; 59698b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis } 59708b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis else 59718b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis { 59728b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis *code++ = OP_CLOSE; 59738b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis PUT2INC(code, 0, oc->number); 59748b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis } 5975f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5976f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich setverb = *code++ = 597753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (cb->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT; 5978f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 597953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Do not set firstcu after *ACCEPT */ 598053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; 5981f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5982f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 5983f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Handle other cases with/without an argument */ 5984f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 598553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (arglen == 0) /* There is no argument */ 5986f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5987f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (verbs[i].op < 0) /* Argument is mandatory */ 5988f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5989f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR66; 5990f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 5991f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5992f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich setverb = *code++ = verbs[i].op; 5993f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 5994f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 599553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else /* An argument is present */ 5996f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 599753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (verbs[i].op_arg < 0) /* Argument is forbidden */ 5998f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 5999f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR59; 6000f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 6001f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6002f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich setverb = *code++ = verbs[i].op_arg; 600353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 600453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Arguments can be very long, especially in 16- and 32-bit modes, 600553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis and can overflow the workspace in the first pass. Instead of 600653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis putting the argument into memory, we just update the length counter 600753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis and set up an empty argument. */ 600853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 600953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (lengthptr != NULL) 601053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 601153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *lengthptr += arglen; 60120ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes *code++ = 0; 60130ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes } 60140ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes else 60150ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes { 60160ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes *code++ = arglen; 601753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_ALT_VERBNAMES) != 0) 601853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 601953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_UCHAR *memcode = code; /* code is "register" */ 602053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (void)process_verb_name(&arg, &memcode, errorcodeptr, options, 602153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis utf, cb); 602253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code = memcode; 602353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 602453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else /* No argument processing */ 602553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 602653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis memcpy(code, arg, CU2BYTES(arglen)); 602753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code += arglen; 602853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 60290ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes } 603053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 6031f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = 0; 6032f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6033f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6034f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich switch (setverb) 6035f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6036f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_THEN: 6037f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_THEN_ARG: 603853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->external_flags |= PCRE2_HASTHEN; 6039f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 6040f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6041f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_PRUNE: 6042f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_PRUNE_ARG: 6043f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_SKIP: 6044f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_SKIP_ARG: 604553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->had_pruneorskip = TRUE; 6046f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 6047f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6048f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6049f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; /* Found verb, exit loop */ 6050f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6051f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6052f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich vn += verbs[i].len + 1; 6053f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6054f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6055f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (i < verbcount) continue; /* Successfully handled a verb */ 6056f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR60; /* Verb not recognized */ 6057f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 6058f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6059f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 606053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Initialization for "real" parentheses */ 60618366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes 60628366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes newoptions = options; 606353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis skipunits = 0; 60648366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes bravalue = OP_CBRA; 60658366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes reset_bracount = FALSE; 60668366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes 6067f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Deal with the extended parentheses; all are introduced by '?', and the 6068f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich appearance of any of them means that this is not a capturing group. */ 6069f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 60708366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes if (*ptr == CHAR_QUESTION_MARK) 6071f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 607253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int i, count; 607353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int namelen; /* Must be signed */ 607453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t index; 607553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t set, unset, *optset; 607653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis named_group *ng; 607753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_SPTR name; 607853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_UCHAR *slot; 6079f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6080f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich switch (*(++ptr)) 6081f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6082f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* ------------------------------------------------------------ */ 6083f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_VERTICAL_LINE: /* Reset capture count for each branch */ 6084f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich reset_bracount = TRUE; 6085f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Fall through */ 6086f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6087f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* ------------------------------------------------------------ */ 6088f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_COLON: /* Non-capturing bracket */ 6089f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bravalue = OP_BRA; 6090f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 6091f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 6092f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6093f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* ------------------------------------------------------------ */ 6094f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_LEFT_PARENTHESIS: 6095f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bravalue = OP_COND; /* Conditional group */ 6096f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich tempptr = ptr; 6097f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6098f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* A condition can be an assertion, a number (referring to a numbered 6099f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich group's having been set), a name (referring to a named group), or 'R', 6100f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich referring to recursion. R<digits> and R&name are also permitted for 6101f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich recursion tests. 6102f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6103f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich There are ways of testing a named group: (?(name)) is used by Python; 6104f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich Perl 5.10 onwards uses (?(<name>) or (?('name')). 6105f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6106f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich There is one unfortunate ambiguity, caused by history. 'R' can be the 6107f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich recursive thing or the name 'R' (and similarly for 'R' followed by 6108f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich digits). We look for a name first; if not found, we try the other case. 6109f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6110f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich For compatibility with auto-callouts, we allow a callout to be 6111f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich specified before a condition that is an assertion. First, check for the 6112f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich syntax of a callout; if found, adjust the temporary pointer that is 6113f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich used to check for an assertion condition. That's all that is needed! */ 6114f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6115f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C) 6116f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 611753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (IS_DIGIT(ptr[3]) || ptr[3] == CHAR_RIGHT_PARENTHESIS) 611853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 611953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break; 612053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[i] == CHAR_RIGHT_PARENTHESIS) 612153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis tempptr += i + 1; 612253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 612353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 612453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 612553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t delimiter = 0; 612653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (i = 0; PRIV(callout_start_delims)[i] != 0; i++) 612753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 612853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[3] == PRIV(callout_start_delims)[i]) 612953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 613053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis delimiter = PRIV(callout_end_delims)[i]; 613153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 613253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 613353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 613453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (delimiter != 0) 613553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 613653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (i = 4; ptr + i < cb->end_pattern; i++) 613753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 613853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[i] == delimiter) 613953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 614053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[i+1] == delimiter) i++; 614153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 614253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 614353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[i+1] == CHAR_RIGHT_PARENTHESIS) tempptr += i + 2; 614453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 614553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 614653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 614753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 614853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 614953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 615053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 615153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* tempptr should now be pointing to the opening parenthesis of the 615253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis assertion condition. */ 615353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 615453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*tempptr != CHAR_LEFT_PARENTHESIS) 615553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 615653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR28; 615753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 615853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 6159f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6160f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6161f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* For conditions that are assertions, check the syntax, and then exit 616253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis the switch. This will take control down to where bracketed groups 616353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis are processed. The assertion will be handled as part of the group, 616453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis but we need to identify this case because the conditional assertion may 616553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis not be quantifier. */ 6166f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6167f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (tempptr[1] == CHAR_QUESTION_MARK && 6168f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich (tempptr[2] == CHAR_EQUALS_SIGN || 6169f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich tempptr[2] == CHAR_EXCLAMATION_MARK || 61708366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes (tempptr[2] == CHAR_LESS_THAN_SIGN && 61718366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes (tempptr[3] == CHAR_EQUALS_SIGN || 61728366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes tempptr[3] == CHAR_EXCLAMATION_MARK)))) 61738366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes { 617453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->iscondassert = TRUE; 6175f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 61768366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes } 6177f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6178f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Other conditions use OP_CREF/OP_DNCREF/OP_RREF/OP_DNRREF, and all 6179f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich need to skip at least 1+IMM2_SIZE bytes at the start of the group. */ 6180f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6181f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code[1+LINK_SIZE] = OP_CREF; 618253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis skipunits = 1+IMM2_SIZE; 6183f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich refsign = -1; /* => not a number */ 6184f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich namelen = -1; /* => not a name; must set to avoid warning */ 6185f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich name = NULL; /* Always set to avoid warning */ 6186f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich recno = 0; /* Always set to avoid warning */ 6187f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 618853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Point at character after (?( */ 6189f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6190f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 619153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 619253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Check for (?(VERSION[>]=n.m), which is a facility whereby indirect 619353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis users of PCRE2 via an application can discover which release of PCRE2 619453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis is being used. */ 619553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 619653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 && 619753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr[7] != CHAR_RIGHT_PARENTHESIS) 619853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 619953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis BOOL ge = FALSE; 620053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int major = 0; 620153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int minor = 0; 620253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 620353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += 7; 620453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*ptr == CHAR_GREATER_THAN_SIGN) 620553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 620653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ge = TRUE; 620753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 620853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 620953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 621053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* NOTE: cannot write IS_DIGIT(*(++ptr)) here because IS_DIGIT 621153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis references its argument twice. */ 621253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 621353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*ptr != CHAR_EQUALS_SIGN || (ptr++, !IS_DIGIT(*ptr))) 621453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 621553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR79; 621653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 621753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 621853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 621953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (IS_DIGIT(*ptr)) major = major * 10 + *ptr++ - '0'; 622053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*ptr == CHAR_DOT) 622153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 622253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 622353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (IS_DIGIT(*ptr)) minor = minor * 10 + *ptr++ - '0'; 622453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (minor < 10) minor *= 10; 622553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 622653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 622753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*ptr != CHAR_RIGHT_PARENTHESIS || minor > 99) 622853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 622953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR79; 623053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 623153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 623253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 623353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ge) 623453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code[1+LINK_SIZE] = ((PCRE2_MAJOR > major) || 623553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (PCRE2_MAJOR == major && PCRE2_MINOR >= minor))? 623653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis OP_TRUE : OP_FALSE; 623753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 623853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code[1+LINK_SIZE] = (PCRE2_MAJOR == major && PCRE2_MINOR == minor)? 623953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis OP_TRUE : OP_FALSE; 624053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 624153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 624253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis skipunits = 1; 624353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; /* End of condition processing */ 624453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 624553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 624653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Check for a test for recursion in a named group. */ 624753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 6248f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND) 6249f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6250f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich terminator = -1; 6251f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr += 2; 6252f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code[1+LINK_SIZE] = OP_RREF; /* Change the type of test */ 6253f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6254f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6255f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Check for a test for a named group's having been set, using the Perl 6256f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich syntax (?(<name>) or (?('name'), and also allow for the original PCRE 6257f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich syntax of (?(name) or for (?(+n), (?(-n), and just (?(n). */ 6258f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6259f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (*ptr == CHAR_LESS_THAN_SIGN) 6260f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6261f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich terminator = CHAR_GREATER_THAN_SIGN; 6262f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 6263f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6264f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (*ptr == CHAR_APOSTROPHE) 6265f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6266f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich terminator = CHAR_APOSTROPHE; 6267f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 6268f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6269f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 6270f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6271f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich terminator = CHAR_NULL; 6272f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*ptr == CHAR_MINUS || *ptr == CHAR_PLUS) refsign = *ptr++; 6273f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (IS_DIGIT(*ptr)) refsign = 0; 6274f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6275f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6276f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Handle a number */ 6277f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6278f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (refsign >= 0) 6279f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6280f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich while (IS_DIGIT(*ptr)) 6281f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 62820ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes if (recno > INT_MAX / 10 - 1) /* Integer overflow */ 62830ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes { 62840ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes while (IS_DIGIT(*ptr)) ptr++; 62850ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes *errorcodeptr = ERR61; 62860ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes goto FAILED; 62870ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes } 6288f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich recno = recno * 10 + (int)(*ptr - CHAR_0); 6289f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 6290f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6291f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6292f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6293f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Otherwise we expect to read a name; anything else is an error. When 629453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis the referenced name is one of a number of duplicates, a different 629553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis opcode is used and it needs more memory. Unfortunately we cannot tell 629653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis whether this is the case in the first pass, so we have to allow for 629753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis more memory always. In the second pass, the additional to skipunits 629853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis happens later. */ 6299f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6300f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 6301f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6302f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (IS_DIGIT(*ptr)) 6303f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 630453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR44; /* Group name must start with non-digit */ 6305f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 6306f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 630753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!MAX_255(*ptr) || (cb->ctypes[*ptr] & ctype_word) == 0) 6308f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6309f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR28; /* Assertion expected */ 6310f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 6311f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 631253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis name = ptr; 631353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Increment ptr, set namelen, check length */ 631453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis READ_NAME(ctype_word, ERR48, *errorcodeptr); 631553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (lengthptr != NULL) skipunits += IMM2_SIZE; 6316f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6317f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6318f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Check the terminator */ 6319f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 632053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((terminator > 0 && *ptr++ != (PCRE2_UCHAR)terminator) || 6321f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *ptr++ != CHAR_RIGHT_PARENTHESIS) 6322f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6323f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr--; /* Error offset */ 6324f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR26; /* Malformed number or name */ 6325f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 6326f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6327f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6328f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Do no further checking in the pre-compile phase. */ 6329f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6330f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (lengthptr != NULL) break; 6331f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6332f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* In the real compile we do the work of looking for the actual 6333f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich reference. If refsign is not negative, it means we have a number in 6334f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich recno. */ 6335f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6336f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (refsign >= 0) 6337f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6338f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (recno <= 0) 6339f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6340f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR35; 6341f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 6342f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6343f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (refsign != 0) recno = (refsign == CHAR_MINUS)? 634453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (cb->bracount + 1) - recno : recno + cb->bracount; 634553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (recno <= 0 || (uint32_t)recno > cb->final_bracount) 6346f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6347f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR15; 6348f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 6349f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6350f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT2(code, 2+LINK_SIZE, recno); 635153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((uint32_t)recno > cb->top_backref) cb->top_backref = recno; 6352f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 6353f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6354f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6355f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Otherwise look for the name. */ 6356f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 635753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis slot = cb->name_table; 635853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (i = 0; i < cb->names_found; i++) 6359f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 636053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (PRIV(strncmp)(name, slot+IMM2_SIZE, namelen) == 0) break; 636153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis slot += cb->name_entry_size; 6362f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6363f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6364f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Found the named subpattern. If the name is duplicated, add one to 6365f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich the opcode to change CREF/RREF into DNCREF/DNRREF and insert 6366f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich appropriate data values. Otherwise, just insert the unique subpattern 6367f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich number. */ 6368f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 636953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (i < cb->names_found) 6370f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 637153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int offset = i; /* Offset of first name found */ 637253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 637353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis count = 0; 637453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (;;) 6375f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 637653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis recno = GET2(slot, 0); /* Number for last found */ 637753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((uint32_t)recno > cb->top_backref) cb->top_backref = recno; 6378f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich count++; 637953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (++i >= cb->names_found) break; 638053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis slot += cb->name_entry_size; 638153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (PRIV(strncmp)(name, slot+IMM2_SIZE, namelen) != 0 || 638253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (slot+IMM2_SIZE)[namelen] != 0) break; 6383f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6384f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6385f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (count > 1) 6386f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6387f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT2(code, 2+LINK_SIZE, offset); 6388f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT2(code, 2+LINK_SIZE+IMM2_SIZE, count); 638953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis skipunits += IMM2_SIZE; 6390f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code[1+LINK_SIZE]++; 6391f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6392f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else /* Not a duplicated name */ 6393f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6394f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT2(code, 2+LINK_SIZE, recno); 6395f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6396f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6397f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6398f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If terminator == CHAR_NULL it means that the name followed directly 6399f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich after the opening parenthesis [e.g. (?(abc)...] and in this case there 6400f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich are some further alternatives to try. For the cases where terminator != 6401f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_NULL [things like (?(<name>... or (?('name')... or (?(R&name)... ] 6402f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich we have now checked all the possibilities, so give an error. */ 6403f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6404f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (terminator != CHAR_NULL) 6405f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6406f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR15; 6407f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 6408f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6409f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6410f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Check for (?(R) for recursion. Allow digits after R to specify a 6411f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich specific group number. */ 6412f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6413f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (*name == CHAR_R) 6414f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6415f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich recno = 0; 6416f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich for (i = 1; i < namelen; i++) 6417f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6418f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (!IS_DIGIT(name[i])) 6419f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 642053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR15; /* Non-existent subpattern */ 6421f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 6422f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 64230ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes if (recno > INT_MAX / 10 - 1) /* Integer overflow */ 64240ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes { 64250ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes *errorcodeptr = ERR61; 64260ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes goto FAILED; 64270ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes } 6428f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich recno = recno * 10 + name[i] - CHAR_0; 6429f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6430f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (recno == 0) recno = RREF_ANY; 6431f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code[1+LINK_SIZE] = OP_RREF; /* Change test type */ 6432f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT2(code, 2+LINK_SIZE, recno); 6433f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6434f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6435f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Similarly, check for the (?(DEFINE) "condition", which is always 643653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis false. During compilation we set OP_DEFINE to distinguish this from 643753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis other OP_FALSE conditions so that it can be checked for having only one 643853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis branch, but after that the opcode is changed to OP_FALSE. */ 6439f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 644053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (namelen == 6 && PRIV(strncmp_c8)(name, STRING_DEFINE, 6) == 0) 6441f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 644253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code[1+LINK_SIZE] = OP_DEFINE; 644353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis skipunits = 1; 6444f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6445f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6446f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Reference to an unidentified subpattern. */ 6447f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6448f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 6449f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6450f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR15; 6451f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 6452f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6453f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 6454f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6455f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6456f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* ------------------------------------------------------------ */ 6457f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_EQUALS_SIGN: /* Positive lookahead */ 6458f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bravalue = OP_ASSERT; 645953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->assert_depth += 1; 6460f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 6461f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 6462f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6463f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Optimize (?!) to (*FAIL) unless it is quantified - which is a weird 6464f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich thing to do, but Perl allows all assertions to be quantified, and when 6465f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich they contain capturing parentheses there may be a potential use for 6466f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich this feature. Not that that applies to a quantified (?!) but we allow 6467f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich it for uniformity. */ 6468f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6469f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* ------------------------------------------------------------ */ 6470f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_EXCLAMATION_MARK: /* Negative lookahead */ 6471f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 6472f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*ptr == CHAR_RIGHT_PARENTHESIS && ptr[1] != CHAR_ASTERISK && 6473f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr[1] != CHAR_PLUS && ptr[1] != CHAR_QUESTION_MARK && 6474f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich (ptr[1] != CHAR_LEFT_CURLY_BRACKET || !is_counted_repeat(ptr+2))) 6475f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6476f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = OP_FAIL; 6477f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich previous = NULL; 6478f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich continue; 6479f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6480f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bravalue = OP_ASSERT_NOT; 648153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->assert_depth += 1; 6482f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 6483f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6484f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6485f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* ------------------------------------------------------------ */ 6486f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_LESS_THAN_SIGN: /* Lookbehind or named define */ 6487f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich switch (ptr[1]) 6488f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6489f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_EQUALS_SIGN: /* Positive lookbehind */ 6490f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bravalue = OP_ASSERTBACK; 649153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->assert_depth += 1; 6492f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr += 2; 6493f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 6494f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6495f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_EXCLAMATION_MARK: /* Negative lookbehind */ 6496f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bravalue = OP_ASSERTBACK_NOT; 649753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->assert_depth += 1; 6498f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr += 2; 6499f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 6500f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 650153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Must be a name definition - as the syntax was checked in the 650253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis pre-pass, we can assume here that it is valid. Skip over the name 650353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis and go to handle the numbered group. */ 650453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 650553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis default: 650653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (*(++ptr) != CHAR_GREATER_THAN_SIGN); 650753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 650853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto NUMBERED_GROUP; 6509f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6510f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 6511f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6512f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6513f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* ------------------------------------------------------------ */ 6514f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_GREATER_THAN_SIGN: /* One-time brackets */ 6515f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bravalue = OP_ONCE; 6516f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 6517f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 6518f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6519f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6520f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* ------------------------------------------------------------ */ 652153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_C: /* Callout */ 6522f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich previous_callout = code; /* Save for later completion */ 6523f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich after_manual_callout = 1; /* Skip one item before completing */ 652453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; /* Character after (?C */ 6525f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 652653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* A callout may have a string argument, delimited by one of a fixed 652753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis number of characters, or an undelimited numerical argument, or no 652853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis argument, which is the same as (?C0). Different opcodes are used for 652953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis the two cases. */ 6530f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 653153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*ptr != CHAR_RIGHT_PARENTHESIS && !IS_DIGIT(*ptr)) 6532f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 653353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t delimiter = 0; 6534f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 653553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (i = 0; PRIV(callout_start_delims)[i] != 0; i++) 6536f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 653753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*ptr == PRIV(callout_start_delims)[i]) 653853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 653953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis delimiter = PRIV(callout_end_delims)[i]; 654053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 654153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 6542f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6543f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 654453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (delimiter == 0) 6545f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 654653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR82; 6547f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 6548f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6549f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 655053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* During the pre-compile phase, we parse the string and update the 655153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis length. There is no need to generate any code. (In fact, the string 655253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis has already been parsed in the pre-pass that looks for named 655353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis parentheses, but it does no harm to leave this code in.) */ 655453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 655553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (lengthptr != NULL) /* Only check the string */ 6556f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 655753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_SPTR start = ptr; 655853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis do 6559f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 656053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (++ptr >= cb->end_pattern) 656153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 656253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR81; 656353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr = start; /* To give a more useful message */ 656453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 656553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 656653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[0] == delimiter && ptr[1] == delimiter) ptr += 2; 6567f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 656853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (ptr[0] != delimiter); 656953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 657053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Start points to the opening delimiter, ptr points to the 657153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis closing delimiter. We must allow for including the delimiter and 657253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for the terminating zero. Any doubled delimiters within the string 657353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis make this an overestimate, but it is not worth bothering about. */ 657453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 657553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (*lengthptr) += (ptr - start) + 2 + (1 + 4*LINK_SIZE); 6576f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6577f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 657853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* In the real compile we can copy the string, knowing that it is 657953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis syntactically OK. The starting delimiter is included so that the 658053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis client can discover it if they want. We also pass the start offset to 658153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis help a script language give better error messages. */ 6582f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 658353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 6584f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 658553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_UCHAR *callout_string = code + (1 + 4*LINK_SIZE); 658653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *callout_string++ = *ptr++; 658753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PUT(code, 1 + 3*LINK_SIZE, (int)(ptr - cb->start_pattern)); /* Start offset */ 658853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for(;;) 6589f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 659053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*ptr == delimiter) 6591f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 659253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[1] == delimiter) ptr++; else break; 6593f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 659453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *callout_string++ = *ptr++; 6595f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 659653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *callout_string++ = CHAR_NULL; 659753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code[0] = OP_CALLOUT_STR; 659853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PUT(code, 1, (int)(ptr + 2 - cb->start_pattern)); /* Next offset */ 659953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PUT(code, 1 + LINK_SIZE, 0); /* Default length */ 660053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PUT(code, 1 + 2*LINK_SIZE, /* Compute size */ 660153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (int)(callout_string - code)); 660253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code = callout_string; 660353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 660453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 660553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Advance to what should be the closing parenthesis, which is 660653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis checked below. */ 660753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 660853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 660953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 661053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 661153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Handle a callout with an optional numerical argument, which must be 661253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis less than or equal to 255. A missing argument gives 0. */ 661353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 661453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 661553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 661653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int n = 0; 661753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code[0] = OP_CALLOUT; /* Numerical callout */ 661853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (IS_DIGIT(*ptr)) 661953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 662053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis n = n * 10 + *ptr++ - CHAR_0; 662153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (n > 255) 6622f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 662353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR38; 6624f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 6625f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6626f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 662753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PUT(code, 1, (int)(ptr - cb->start_pattern + 1)); /* Next offset */ 662853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PUT(code, 1 + LINK_SIZE, 0); /* Default length */ 662953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code[1 + 2*LINK_SIZE] = n; /* Callout number */ 663053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code += PRIV(OP_lengths)[OP_CALLOUT]; 663153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 6632f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 663353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Both formats must have a closing parenthesis */ 6634f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 663553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*ptr != CHAR_RIGHT_PARENTHESIS) 663653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 663753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR39; 663853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 663953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 6640f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 664153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Callouts cannot be quantified. */ 6642f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 664353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis previous = NULL; 664453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis continue; 6645f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 664653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 664753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* ------------------------------------------------------------ */ 664853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_P: /* Python-style named subpattern handling */ 664953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*(++ptr) == CHAR_EQUALS_SIGN || 665053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *ptr == CHAR_GREATER_THAN_SIGN) /* Reference or recursion */ 665153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 665253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis is_recurse = *ptr == CHAR_GREATER_THAN_SIGN; 665353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis terminator = CHAR_RIGHT_PARENTHESIS; 665453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto NAMED_REF_OR_RECURSE; 665553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 665653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (*ptr != CHAR_LESS_THAN_SIGN) /* Test for Python-style defn */ 665753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 665853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR41; 665953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 6660f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 666153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Fall through to handle (?P< as (?< is handled */ 666253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 666353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 666453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* ------------------------------------------------------------ */ 666553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_APOSTROPHE: /* Define a name - note fall through above */ 666653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 666753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* The syntax was checked and the list of names was set up in the 666853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis pre-pass, so there is nothing to be done now except to skip over the 666953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis name. */ 6670f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 667153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis terminator = (*ptr == CHAR_LESS_THAN_SIGN)? 667253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE; 667353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (*(++ptr) != (unsigned int)terminator); 667453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr++; 667553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto NUMBERED_GROUP; /* Set up numbered group */ 6676f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6677f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6678f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* ------------------------------------------------------------ */ 6679f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_AMPERSAND: /* Perl recursion/subroutine syntax */ 6680f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich terminator = CHAR_RIGHT_PARENTHESIS; 6681f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich is_recurse = TRUE; 6682f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Fall through */ 6683f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6684f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* We come here from the Python syntax above that handles both 6685f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich references (?P=name) and recursion (?P>name), as well as falling 6686f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich through from the Perl recursion syntax (?&name). We also come here from 6687f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich the Perl \k<name> or \k'name' back reference syntax and the \k{name} 6688f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich .NET syntax, and the Oniguruma \g<...> and \g'...' subroutine syntax. */ 6689f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6690f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich NAMED_REF_OR_RECURSE: 6691f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich name = ++ptr; 6692f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (IS_DIGIT(*ptr)) 6693f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 669453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR44; /* Group name must start with non-digit */ 6695f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 6696f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 669753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Increment ptr, set namelen, check length */ 669853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis READ_NAME(ctype_word, ERR48, *errorcodeptr); 6699f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 670053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* In the pre-compile phase, do a syntax check. */ 6701f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6702f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (lengthptr != NULL) 6703f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6704f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (namelen == 0) 6705f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6706f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR62; 6707f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 6708f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 670953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*ptr != (PCRE2_UCHAR)terminator) 6710f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6711f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR42; 6712f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 6713f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 671453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 6715f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 671653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Scan the list of names generated in the pre-pass in order to get 671753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis a number and whether or not this name is duplicated. */ 67180ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes 671953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis recno = 0; 672053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis is_dupname = FALSE; 672153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ng = cb->named_groups; 67220ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes 672353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (i = 0; i < cb->names_found; i++, ng++) 672453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 672553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (namelen == ng->length && 672653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PRIV(strncmp)(name, ng->name, namelen) == 0) 672753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 672853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis open_capitem *oc; 672953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis is_dupname = ng->isdup; 673053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis recno = ng->number; 67310ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes 673253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* For a recursion, that's all that is needed. We can now go to the 673353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code that handles numerical recursion. */ 67340ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes 673553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (is_recurse) goto HANDLE_RECURSION; 67360ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes 673753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* For a back reference, update the back reference map and the 673853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis maximum back reference. Then for each group we must check to see if 673953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis it is recursive, that is, it is inside the group that it 674053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis references. A flag is set so that the group can be made atomic. */ 67410ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes 674253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->backref_map |= (recno < 32)? (1u << recno) : 1; 674353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((uint32_t)recno > cb->top_backref) cb->top_backref = recno; 67440ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes 674553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (oc = cb->open_caps; oc != NULL; oc = oc->next) 67460ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes { 674753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (oc->number == recno) 67480ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes { 674953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis oc->flag = TRUE; 675053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 67510ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes } 67520ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes } 67530ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes } 6754f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6755f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 675653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If the name was not found we have a bad reference. */ 6757f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 675853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (recno == 0) 6759f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 676053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR15; 676153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 6762f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6763f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 676453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If a back reference name is not duplicated, we can handle it as a 676553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis numerical reference. */ 6766f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 676753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!is_dupname) goto HANDLE_REFERENCE; 6768f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 676953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If a back reference name is duplicated, we generate a different 677053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis opcode to a numerical back reference. In the second pass we must search 677153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for the index and count in the final name table. */ 6772f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 677353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis count = 0; 677453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis index = 0; 6775f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 677653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (lengthptr == NULL) 677753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 677853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis slot = cb->name_table; 677953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (i = 0; i < cb->names_found; i++) 6780f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 678153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (PRIV(strncmp)(name, slot+IMM2_SIZE, namelen) == 0 && 678253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis slot[IMM2_SIZE+namelen] == 0) 6783f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 678453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (count == 0) index = i; 678553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis count++; 6786f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 678753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis slot += cb->name_entry_size; 678853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 6789f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 679053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (count == 0) 679153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 679253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR15; 679353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 6794f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6795f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6796f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 679753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; 679853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis previous = code; 679953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *code++ = ((options & PCRE2_CASELESS) != 0)? OP_DNREFI : OP_DNREF; 680053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PUT2INC(code, 0, index); 680153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PUT2INC(code, 0, count); 680253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis continue; /* End of back ref handling */ 6803f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6804f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6805f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* ------------------------------------------------------------ */ 68060ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes case CHAR_R: /* Recursion, same as (?0) */ 68070ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes recno = 0; 68080ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes if (*(++ptr) != CHAR_RIGHT_PARENTHESIS) 68090ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes { 68100ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes *errorcodeptr = ERR29; 68110ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes goto FAILED; 68120ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes } 68130ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes goto HANDLE_RECURSION; 6814f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6815f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6816f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* ------------------------------------------------------------ */ 6817f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_MINUS: case CHAR_PLUS: /* Recursion or subroutine */ 6818f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: 6819f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9: 6820f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6821f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich terminator = CHAR_RIGHT_PARENTHESIS; 6822f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6823f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Come here from the \g<...> and \g'...' code (Oniguruma 6824f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich compatibility). However, the syntax has been checked to ensure that 6825f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich the ... are a (signed) number, so that neither ERR63 nor ERR29 will 6826f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich be called on this path, nor with the jump to OTHER_CHAR_AFTER_QUERY 6827f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ever be taken. */ 6828f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6829f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich HANDLE_NUMERICAL_RECURSION: 6830f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6831f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if ((refsign = *ptr) == CHAR_PLUS) 6832f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6833f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 6834f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (!IS_DIGIT(*ptr)) 6835f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6836f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR63; 6837f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 6838f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6839f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6840f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (refsign == CHAR_MINUS) 6841f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6842f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (!IS_DIGIT(ptr[1])) 6843f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto OTHER_CHAR_AFTER_QUERY; 6844f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 6845f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6846f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6847f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich recno = 0; 684853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (IS_DIGIT(*ptr)) 68490ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes { 68500ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes if (recno > INT_MAX / 10 - 1) /* Integer overflow */ 68510ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes { 68520ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes while (IS_DIGIT(*ptr)) ptr++; 68530ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes *errorcodeptr = ERR61; 68540ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes goto FAILED; 68550ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes } 6856f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich recno = recno * 10 + *ptr++ - CHAR_0; 68570ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes } 6858f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 685953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*ptr != (PCRE2_UCHAR)terminator) 6860f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6861f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR29; 6862f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 6863f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6864f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6865f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (refsign == CHAR_MINUS) 6866f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6867f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (recno == 0) 6868f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6869f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR58; 6870f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 6871f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 687253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis recno = (int)(cb->bracount + 1) - recno; 6873f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (recno <= 0) 6874f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6875f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR15; 6876f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 6877f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6878f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6879f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (refsign == CHAR_PLUS) 6880f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 688153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (recno == 0) 6882f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 688353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR58; 6884f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 6885f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 688653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis recno += cb->bracount; 688753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 688853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 688953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((uint32_t)recno > cb->final_bracount) 689053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 689153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR15; 689253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 6893f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6894f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 689553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Come here from code above that handles a named recursion. 689653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis We insert the number of the called group after OP_RECURSE. At the 689753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis end of compiling the pattern is scanned and these numbers are 689853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis replaced by offsets within the pattern. It is done like this to avoid 689953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis problems with forward references and adjusting offsets when groups 690053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis are duplicated and moved (as discovered in previous implementations). 690153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis Note that a recursion does not have a set first character (relevant 690253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if it is repeated, because it will then be wrapped with ONCE 690353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis brackets). */ 6904f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 690553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis HANDLE_RECURSION: 690653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis previous = code; 6907f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code = OP_RECURSE; 690853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PUT(code, 1, recno); 6909f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code += 1 + LINK_SIZE; 691053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis groupsetfirstcu = FALSE; 691153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->had_recurse = TRUE; 6912f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6913f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6914f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Can't determine a first byte now */ 6915f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 691653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; 6917f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich continue; 6918f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6919f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6920f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* ------------------------------------------------------------ */ 6921f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich default: /* Other characters: check option setting */ 6922f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich OTHER_CHAR_AFTER_QUERY: 6923f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich set = unset = 0; 6924f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich optset = &set; 6925f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6926f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON) 6927f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6928f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich switch (*ptr++) 6929f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6930f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_MINUS: optset = &unset; break; 6931f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6932f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_J: /* Record that it changed in the external options */ 693353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *optset |= PCRE2_DUPNAMES; 693453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->external_flags |= PCRE2_JCHANGED; 6935f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 6936f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 693753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_i: *optset |= PCRE2_CASELESS; break; 693853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_m: *optset |= PCRE2_MULTILINE; break; 693953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_s: *optset |= PCRE2_DOTALL; break; 694053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_x: *optset |= PCRE2_EXTENDED; break; 694153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case CHAR_U: *optset |= PCRE2_UNGREEDY; break; 6942f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 694353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis default: *errorcodeptr = ERR11; 6944f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr--; /* Correct the offset */ 6945f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 6946f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6947f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6948f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6949f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Set up the changed option bits, but don't change anything yet. */ 6950f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6951f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich newoptions = (options | set) & (~unset); 6952f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6953f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If the options ended with ')' this is not the start of a nested 695453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis group with option changes, so the options change at this level. They 695553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis must also be passed back for use in subsequent branches. Reset the 695653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis greedy defaults and the case value for firstcu and reqcu. */ 6957f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6958f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*ptr == CHAR_RIGHT_PARENTHESIS) 6959f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6960f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *optionsptr = options = newoptions; 696153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis greedy_default = ((newoptions & PCRE2_UNGREEDY) != 0); 696253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis greedy_non_default = greedy_default ^ 1; 696353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis req_caseopt = ((newoptions & PCRE2_CASELESS) != 0)? REQ_CASELESS:0; 6964f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich previous = NULL; /* This item can't be repeated */ 6965f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich continue; /* It is complete */ 6966f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6967f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6968f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If the options ended with ':' we are heading into a nested group 6969f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich with possible change of options. Such groups are non-capturing and are 6970f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich not assertions of any kind. All we need to do is skip over the ':'; 6971f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich the newoptions value is handled below. */ 6972f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6973f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bravalue = OP_BRA; 6974f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 6975f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } /* End of switch for character following (? */ 6976f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } /* End of (? handling */ 6977f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 697853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Opening parenthesis not followed by '*' or '?'. If PCRE2_NO_AUTO_CAPTURE 6979f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich is set, all unadorned brackets become non-capturing and behave like (?:...) 6980f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich brackets. */ 6981f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 698253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if ((options & PCRE2_NO_AUTO_CAPTURE) != 0) 6983f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6984f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bravalue = OP_BRA; 6985f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6986f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6987f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Else we have a capturing group. */ 6988f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6989f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 6990f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 6991f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich NUMBERED_GROUP: 699253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->bracount += 1; 699353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PUT2(code, 1+LINK_SIZE, cb->bracount); 699453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis skipunits = IMM2_SIZE; 6995f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 6996f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 6997f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Process nested bracketed regex. First check for parentheses nested too 6998f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich deeply. */ 6999f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 700053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((cb->parens_depth += 1) > (int)(cb->cx->parens_nest_limit)) 7001f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 700253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR19; 7003f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 7004f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7005f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 70068366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes /* All assertions used not to be repeatable, but this was changed for Perl 70078366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes compatibility. All kinds can now be repeated except for assertions that are 70088366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes conditions (Perl also forbids these to be repeated). We copy code into a 7009f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich non-register variable (tempcode) in order to be able to pass its address 70108366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes because some compilers complain otherwise. At the start of a conditional 701153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis group whose condition is an assertion, cb->iscondassert is set. We unset it 70128366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes here so as to allow assertions later in the group to be quantified. */ 70138366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes 70148366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT && 701553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->iscondassert) 70168366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes { 70178366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes previous = NULL; 701853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->iscondassert = FALSE; 70198366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes } 70200ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes else 70210ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes { 70220ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes previous = code; 70230ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes } 7024f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7025f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code = bravalue; 7026f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich tempcode = code; 702753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis tempreqvary = cb->req_varyopt; /* Save value before bracket */ 702853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis tempbracount = cb->bracount; /* Save value before bracket */ 7029f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich length_prevgroup = 0; /* Initialize for pre-compile phase */ 7030f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7031f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (!compile_regex( 7032f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich newoptions, /* The complete new option state */ 7033f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich &tempcode, /* Where to put code (updated) */ 7034f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich &ptr, /* Input pointer (updated) */ 7035f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich errorcodeptr, /* Where to put an error message */ 7036f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich (bravalue == OP_ASSERTBACK || 7037f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ 7038f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich reset_bracount, /* True if (?| group */ 703953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis skipunits, /* Skip over bracket number */ 7040f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cond_depth + 7041f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ((bravalue == OP_COND)?1:0), /* Depth of condition subpatterns */ 704253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis &subfirstcu, /* For possible first char */ 704353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis &subfirstcuflags, 704453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis &subreqcu, /* For possible last char */ 704553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis &subreqcuflags, 7046f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bcptr, /* Current branch chain */ 704753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb, /* Compile data block */ 7048f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich (lengthptr == NULL)? NULL : /* Actual compile phase */ 7049f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich &length_prevgroup /* Pre-compile phase */ 7050f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich )) 7051f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 7052f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 705353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->parens_depth -= 1; 7054f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7055f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If this was an atomic group and there are no capturing groups within it, 7056f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich generate OP_ONCE_NC instead of OP_ONCE. */ 7057f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 705853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (bravalue == OP_ONCE && cb->bracount <= tempbracount) 7059f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code = OP_ONCE_NC; 7060f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7061f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT) 706253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->assert_depth -= 1; 7063f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7064f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* At the end of compiling, code is still pointing to the start of the 7065f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich group, while tempcode has been updated to point past the end of the group. 7066f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich The pattern pointer (ptr) is on the bracket. 7067f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7068f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich If this is a conditional bracket, check that there are no more than 7069f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich two branches in the group, or just one if it's a DEFINE group. We do this 7070f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich in the real compile phase, not in the pre-pass, where the whole group may 7071f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich not be available. */ 7072f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7073f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (bravalue == OP_COND && lengthptr == NULL) 7074f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 707553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_UCHAR *tc = code; 7076f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int condcount = 0; 7077f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7078f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich do { 7079f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich condcount++; 7080f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich tc += GET(tc,1); 7081f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7082f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich while (*tc != OP_KET); 7083f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7084f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* A DEFINE group is never obeyed inline (the "condition" is always 708553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis false). It must have only one branch. Having checked this, change the 708653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis opcode to OP_FALSE. */ 7087f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 708853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (code[LINK_SIZE+1] == OP_DEFINE) 7089f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7090f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (condcount > 1) 7091f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7092f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR54; 7093f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 7094f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 709553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code[LINK_SIZE+1] = OP_FALSE; 709653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis bravalue = OP_DEFINE; /* Just a flag to suppress char handling below */ 7097f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7098f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7099f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* A "normal" conditional group. If there is just one branch, we must not 710053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis make use of its firstcu or reqcu, because this is equivalent to an 7101f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich empty second branch. */ 7102f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7103f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 7104f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7105f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (condcount > 2) 7106f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7107f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR27; 7108f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 7109f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 711053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (condcount == 1) subfirstcuflags = subreqcuflags = REQ_NONE; 7111f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7112f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7113f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 71148b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis /* At the end of a group, it's an error if we hit end of pattern or 71158b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis any non-closing parenthesis. This check also happens in the pre-scan, 71168b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis so should not trigger here, but leave this code as an insurance. */ 7117f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7118f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*ptr != CHAR_RIGHT_PARENTHESIS) 7119f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7120f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR14; 7121f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 7122f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7123f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7124f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* In the pre-compile phase, update the length by the length of the group, 7125f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich less the brackets at either end. Then reduce the compiled code to just a 7126f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich set of non-capturing brackets so that it doesn't use much memory if it is 7127f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich duplicated by a quantifier.*/ 7128f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7129f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (lengthptr != NULL) 7130f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7131f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (OFLOW_MAX - *lengthptr < length_prevgroup - 2 - 2*LINK_SIZE) 7132f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7133f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR20; 7134f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 7135f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7136f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE; 7137f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code++; /* This already contains bravalue */ 7138f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUTINC(code, 0, 1 + LINK_SIZE); 7139f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = OP_KET; 7140f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUTINC(code, 0, 1 + LINK_SIZE); 7141f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; /* No need to waste time with special character handling */ 7142f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7143f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7144f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Otherwise update the main code pointer to the end of the group. */ 7145f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7146f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code = tempcode; 7147f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7148f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* For a DEFINE group, required and first character settings are not 7149f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich relevant. */ 7150f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 715153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (bravalue == OP_DEFINE) break; 7152f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7153f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Handle updating of the required and first characters for other types of 7154f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich group. Update for normal brackets of all kinds, and conditions with two 7155f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich branches (see code above). If the bracket is followed by a quantifier with 715653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zero repeat, we have to back off. Hence the definition of zeroreqcu and 715753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zerofirstcu outside the main loop so that they can be accessed for the 7158f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich back off. */ 7159f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 716053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zeroreqcu = reqcu; 716153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zeroreqcuflags = reqcuflags; 716253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zerofirstcu = firstcu; 716353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zerofirstcuflags = firstcuflags; 716453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis groupsetfirstcu = FALSE; 7165f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7166f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (bravalue >= OP_ONCE) 7167f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 716853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If we have not yet set a firstcu in this branch, take it from the 7169f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich subpattern, remembering that it was set here so that a repeat of more 717053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis than one can replicate it as reqcu if necessary. If the subpattern has 717153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis no firstcu, set "none" for the whole branch. In both cases, a zero 717253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis repeat forces firstcu to "none". */ 7173f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 717453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET) 7175f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 717653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (subfirstcuflags >= 0) 7177f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 717853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis firstcu = subfirstcu; 717953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis firstcuflags = subfirstcuflags; 718053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis groupsetfirstcu = TRUE; 7181f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 718253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else firstcuflags = REQ_NONE; 718353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zerofirstcuflags = REQ_NONE; 7184f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7185f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 718653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If firstcu was previously set, convert the subpattern's firstcu 718753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis into reqcu if there wasn't one, using the vary flag that was in 7188f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich existence beforehand. */ 7189f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 719053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (subfirstcuflags >= 0 && subreqcuflags < 0) 7191f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 719253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis subreqcu = subfirstcu; 719353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis subreqcuflags = subfirstcuflags | tempreqvary; 7194f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7195f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7196f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If the subpattern set a required byte (or set a first byte that isn't 7197f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich really the first byte - see above), set it. */ 7198f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 719953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (subreqcuflags >= 0) 7200f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 720153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcu = subreqcu; 720253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcuflags = subreqcuflags; 7203f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7204f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7205f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 720653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* For a forward assertion, we take the reqcu, if set. This can be 7207f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich helpful if the pattern that follows the assertion doesn't set a different 720853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis char. For example, it's useful for /(?=abcde).+/. We can't set firstcu 7209f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich for an assertion, however because it leads to incorrect effect for patterns 721053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis such as /(?=a)a.+/ when the "real" "a" would then become a reqcu instead 721153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis of a firstcu. This is overcome by a scan at the end if there's no 721253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis firstcu, looking for an asserted first char. */ 7213f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 721453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (bravalue == OP_ASSERT && subreqcuflags >= 0) 7215f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 721653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcu = subreqcu; 721753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcuflags = subreqcuflags; 7218f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7219f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; /* End of processing '(' */ 7220f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7221f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7222f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* ===================================================================*/ 7223f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Handle metasequences introduced by \. For ones like \d, the ESC_ values 7224f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich are arranged to be the negation of the corresponding OP_values in the 722553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis default case when PCRE2_UCP is not set. For the back references, the values 7226f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich are negative the reference number. Only back references and those types 7227f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich that consume a character may be repeated. We can test for values between 7228f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ESC_b and ESC_Z for the latter; this may have to change if any new ones are 722953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ever created. 723053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 723153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis Note: \Q and \E are handled at the start of the character-processing loop, 723253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis not here. */ 7233f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7234f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case CHAR_BACKSLASH: 7235f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich tempptr = ptr; 723653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis escape = PRIV(check_escape)(&ptr, cb->end_pattern, &ec, errorcodeptr, 723753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis options, FALSE, cb); 7238f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*errorcodeptr != 0) goto FAILED; 7239f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7240f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (escape == 0) /* The escape coded a single character */ 7241f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich c = ec; 7242f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 7243f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7244f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* For metasequences that actually match a character, we disable the 7245f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich setting of a first character if it hasn't already been set. */ 7246f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 724753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (firstcuflags == REQ_UNSET && escape > ESC_b && escape < ESC_Z) 724853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis firstcuflags = REQ_NONE; 7249f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7250f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Set values to reset to if this is followed by a zero repeat. */ 7251f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 725253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zerofirstcu = firstcu; 725353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zerofirstcuflags = firstcuflags; 725453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zeroreqcu = reqcu; 725553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zeroreqcuflags = reqcuflags; 7256f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7257f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n' 7258f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich is a subroutine call by number (Oniguruma syntax). In fact, the value 7259f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ESC_g is returned only for these cases. So we don't need to check for < 7260f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich or ' if the value is ESC_g. For the Perl syntax \g{n} the value is 7261f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich -n, and for the Perl syntax \g{name} the result is ESC_k (as 7262f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich that is a synonym for a named back reference). */ 7263f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7264f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (escape == ESC_g) 7265f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 726653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_SPTR p; 726753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t cf; 7268f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7269f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)? 7270f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE; 7271f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7272f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* These two statements stop the compiler for warning about possibly 7273f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In 7274f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich fact, because we do the check for a number below, the paths that 7275f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich would actually be in error are never taken. */ 7276f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 727753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis skipunits = 0; 7278f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich reset_bracount = FALSE; 7279f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7280f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If it's not a signed or unsigned number, treat it as a name. */ 7281f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7282f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cf = ptr[1]; 7283f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (cf != CHAR_PLUS && cf != CHAR_MINUS && !IS_DIGIT(cf)) 7284f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7285f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich is_recurse = TRUE; 7286f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto NAMED_REF_OR_RECURSE; 7287f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7288f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7289f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Signed or unsigned number (cf = ptr[1]) is known to be plus or minus 7290f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich or a digit. */ 7291f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7292f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich p = ptr + 2; 7293f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich while (IS_DIGIT(*p)) p++; 729453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (*p != (PCRE2_UCHAR)terminator) 7295f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7296f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR57; 72970ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes goto FAILED; 7298f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7299f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 7300f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto HANDLE_NUMERICAL_RECURSION; 7301f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7302f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7303f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* \k<name> or \k'name' is a back reference by name (Perl syntax). 7304f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich We also support \k{name} (.NET syntax). */ 7305f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7306f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (escape == ESC_k) 7307f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7308f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if ((ptr[1] != CHAR_LESS_THAN_SIGN && 7309f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr[1] != CHAR_APOSTROPHE && ptr[1] != CHAR_LEFT_CURLY_BRACKET)) 7310f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7311f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR69; 73120ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes goto FAILED; 7313f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7314f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich is_recurse = FALSE; 7315f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)? 7316f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)? 7317f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET; 7318f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto NAMED_REF_OR_RECURSE; 7319f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7320f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 732153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Back references are handled specially; must disable firstcu if 7322f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich not set to cope with cases like (?=(\w+))\1: which would otherwise set 7323f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ':' later. */ 7324f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7325f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (escape < 0) 7326f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7327f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich open_capitem *oc; 7328f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich recno = -escape; 7329f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7330f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Come here from named backref handling when the reference is to a 733153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis single group (i.e. not to a duplicated name). */ 7332f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7333f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich HANDLE_REFERENCE: 733453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (recno > (int)cb->final_bracount) 733553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 733653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR15; 733753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 733853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 733953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; 7340f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich previous = code; 734153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *code++ = ((options & PCRE2_CASELESS) != 0)? OP_REFI : OP_REF; 7342f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT2INC(code, 0, recno); 734353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->backref_map |= (recno < 32)? (1u << recno) : 1; 734453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((uint32_t)recno > cb->top_backref) cb->top_backref = recno; 7345f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7346f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Check to see if this back reference is recursive, that it, it 7347f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich is inside the group that it references. A flag is set so that the 7348f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich group can be made atomic. */ 7349f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 735053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (oc = cb->open_caps; oc != NULL; oc = oc->next) 7351f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7352f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (oc->number == recno) 7353f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7354f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich oc->flag = TRUE; 7355f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 7356f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7357f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7358f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7359f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7360f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* So are Unicode property matches, if supported. */ 7361f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 736253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 7363f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (escape == ESC_P || escape == ESC_p) 7364f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7365f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich BOOL negated; 7366f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich unsigned int ptype = 0, pdata = 0; 736753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr, cb)) 7368f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 7369f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich previous = code; 7370f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP; 7371f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = ptype; 7372f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = pdata; 7373f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7374f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#else 7375f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7376f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If Unicode properties are not supported, \X, \P, and \p are not 7377f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich allowed. */ 7378f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7379f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (escape == ESC_X || escape == ESC_P || escape == ESC_p) 7380f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7381f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR45; 7382f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto FAILED; 7383f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7384f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 7385f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 738653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* The use of \C can be locked out. */ 738753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 738853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef NEVER_BACKSLASH_C 738953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (escape == ESC_C) 739053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 739153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR85; 739253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 739353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 739453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else 739553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (escape == ESC_C && (options & PCRE2_NEVER_BACKSLASH_C) != 0) 739653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 739753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = ERR83; 739853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto FAILED; 739953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 740053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 740153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 7402f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* For the rest (including \X when Unicode properties are supported), we 7403f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich can obtain the OP value by negating the escape value in the default 740453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis situation when PCRE2_UCP is not set. When it *is* set, we substitute 7405f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich Unicode property tests. Note that \b and \B do a one-character 7406f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich lookbehind, and \A also behaves as if it does. */ 7407f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7408f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 7409f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 741053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (escape == ESC_C) cb->external_flags |= PCRE2_HASBKC; /* Record */ 7411f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) && 741253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->max_lookbehind == 0) 741353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->max_lookbehind = 1; 741453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 7415f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (escape >= ESC_DU && escape <= ESC_wu) 7416f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 741753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->nestptr[1] = cb->nestptr[0]; /* Back up if at 2nd level */ 741853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->nestptr[0] = ptr + 1; /* Where to resume */ 7419f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr = substitutes[escape - ESC_DU] - 1; /* Just before substitute */ 7420f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7421f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 7422f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 74238b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis /* In non-UTF mode, and for both 32-bit modes, we turn \C into 74248b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis OP_ALLANY instead of OP_ANYBYTE so that it works in DFA mode and in 74258b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis lookbehinds. */ 7426f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7427f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7428f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich previous = (escape > ESC_b && escape < ESC_Z)? code : NULL; 74298b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis#if PCRE2_CODE_UNIT_WIDTH == 32 74308b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis *code++ = (escape == ESC_C)? OP_ALLANY : escape; 74318b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis#else 7432f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape; 74338b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis#endif 7434f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7435f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7436f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich continue; 7437f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7438f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7439f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* We have a data character whose value is in c. In UTF-8 mode it may have 7440f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich a value > 127. We set its representation in the length/buffer, and then 7441f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich handle it as a data character. */ 7442f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 744353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis mclength = PUTCHAR(c, mcbuffer); 7444f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich goto ONE_CHAR; 7445f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7446f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7447f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* ===================================================================*/ 7448f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Handle a literal character. It is guaranteed not to be whitespace or # 7449f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich when the extended flag is set. If we are in a UTF mode, it may be a 7450f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich multi-unit literal character. */ 7451f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7452f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich default: 7453f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich NORMAL_CHAR: 7454f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich mclength = 1; 7455f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich mcbuffer[0] = c; 7456f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 745753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 7458f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (utf && HAS_EXTRALEN(c)) 7459f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr)); 7460f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 7461f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7462f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* At this point we have the character's bytes in mcbuffer, and the length 746353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis in mclength. When not in UTF mode, the length is always 1. */ 7464f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7465f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ONE_CHAR: 7466f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich previous = code; 7467f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 746853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* For caseless UTF mode, check whether this character has more than one 746953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis other case. If so, generate a special OP_PROP item instead of OP_CHARI. */ 7470f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 747153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE 747253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (utf && (options & PCRE2_CASELESS) != 0) 7473f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7474f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich GETCHAR(c, mcbuffer); 7475f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if ((c = UCD_CASESET(c)) != 0) 7476f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7477f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = OP_PROP; 7478f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = PT_CLIST; 7479f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = c; 748053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (firstcuflags == REQ_UNSET) 748153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis firstcuflags = zerofirstcuflags = REQ_NONE; 7482f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 7483f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7484f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7485f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 7486f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7487f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Caseful matches, or not one of the multicase characters. */ 7488f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 748953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *code++ = ((options & PCRE2_CASELESS) != 0)? OP_CHARI : OP_CHAR; 7490f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich for (c = 0; c < mclength; c++) *code++ = mcbuffer[c]; 7491f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7492f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Remember if \r or \n were seen */ 7493f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7494f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (mcbuffer[0] == CHAR_CR || mcbuffer[0] == CHAR_NL) 749553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->external_flags |= PCRE2_HASCRORLF; 7496f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7497f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Set the first and required bytes appropriately. If no previous first 7498f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich byte, set it from this character, but revert to none on a zero repeat. 749953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis Otherwise, leave the firstcu value alone, and don't change it on a zero 7500f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich repeat. */ 7501f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 750253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (firstcuflags == REQ_UNSET) 7503f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 750453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zerofirstcuflags = REQ_NONE; 750553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zeroreqcu = reqcu; 750653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zeroreqcuflags = reqcuflags; 7507f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 750853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If the character is more than one byte long, we can set firstcu 7509f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich only if it is not to be matched caselessly. */ 7510f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7511f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (mclength == 1 || req_caseopt == 0) 7512f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 751353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis firstcu = mcbuffer[0] | req_caseopt; 751453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis firstcu = mcbuffer[0]; 751553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis firstcuflags = req_caseopt; 7516f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7517f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (mclength != 1) 7518f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 751953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcu = code[-1]; 752053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcuflags = cb->req_varyopt; 7521f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7522f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 752353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else firstcuflags = reqcuflags = REQ_NONE; 7524f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7525f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 752653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* firstcu was previously set; we can set reqcu only if the length is 7527f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 1 or the matching is caseful. */ 7528f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7529f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 7530f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 753153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zerofirstcu = firstcu; 753253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zerofirstcuflags = firstcuflags; 753353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zeroreqcu = reqcu; 753453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis zeroreqcuflags = reqcuflags; 7535f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (mclength == 1 || req_caseopt == 0) 7536f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 753753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcu = code[-1]; 753853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcuflags = req_caseopt | cb->req_varyopt; 7539f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7540f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7541f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7542f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; /* End of literal character handling */ 7543f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7544f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } /* end of big loop */ 7545f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7546f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Control never reaches here by falling through, only by a goto for all the 7547f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevicherror states. Pass back the position in the pattern so that it can be displayed 7548f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichto the user for diagnosing the error. */ 7549f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7550f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichFAILED: 7551f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*ptrptr = ptr; 7552f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichreturn FALSE; 7553f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich} 7554f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7555f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7556f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7557f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/************************************************* 755853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Compile regex: a sequence of alternatives * 7559f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/ 7560f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7561f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* On entry, ptr is pointing past the bracket character, but on return it 7562f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichpoints to the closing bracket, or vertical bar, or end of string. The code 7563f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichvariable is pointing at the byte into which the BRA operator has been stored. 7564f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichThis function is used during the pre-compile phase when we are trying to find 7565f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichout the amount of memory needed, as well as during the real compile phase. The 7566f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichvalue of lengthptr distinguishes the two phases. 7567f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7568f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments: 7569f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich options option bits, including any changes for this subpattern 7570f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich codeptr -> the address of the current code pointer 7571f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptrptr -> the address of the current pattern pointer 7572f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich errorcodeptr -> pointer to error code variable 7573f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich lookbehind TRUE if this is a lookbehind assertion 7574f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich reset_bracount TRUE to reset the count for each branch 757553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis skipunits skip this many code units at start (for brackets and OP_COND) 7576f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cond_depth depth of nesting for conditional subpatterns 757753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis firstcuptr place to put the first required code unit 757853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis firstcuflagsptr place to put the first code unit flags, or a negative number 757953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcuptr place to put the last required code unit 758053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcuflagsptr place to put the last required code unit flags, or a negative number 7581f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bcptr pointer to the chain of currently open branches 758253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb points to the data block with tables pointers etc. 7583f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich lengthptr NULL during the real compile phase 7584f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich points to length accumulator during pre-compile phase 7585f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7586f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichReturns: TRUE on success 7587f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/ 7588f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7589f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic BOOL 759053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscompile_regex(uint32_t options, PCRE2_UCHAR **codeptr, PCRE2_SPTR *ptrptr, 759153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, uint32_t skipunits, 759253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int cond_depth, uint32_t *firstcuptr, int32_t *firstcuflagsptr, 759353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t *reqcuptr, int32_t *reqcuflagsptr, branch_chain *bcptr, 759453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis compile_block *cb, size_t *lengthptr) 7595f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{ 759653e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR ptr = *ptrptr; 759753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *code = *codeptr; 759853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *last_branch = code; 759953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *start_bracket = code; 760053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *reverse_count = NULL; 7601f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichopen_capitem capitem; 7602f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichint capnumber = 0; 760353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t firstcu, reqcu; 760453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint32_t firstcuflags, reqcuflags; 760553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t branchfirstcu, branchreqcu; 760653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint32_t branchfirstcuflags, branchreqcuflags; 760753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissize_t length; 7608f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichunsigned int orig_bracount; 7609f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichunsigned int max_bracount; 7610f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbranch_chain bc; 7611f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7612f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* If set, call the external function that checks for stack availability. */ 7613f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 761453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (cb->cx->stack_guard != NULL && 761553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->cx->stack_guard(cb->parens_depth, cb->cx->stack_guard_data)) 7616f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 761753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr= ERR33; 7618f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich return FALSE; 7619f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7620f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7621f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Miscellaneous initialization */ 7622f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7623f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbc.outer = bcptr; 7624f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbc.current_branch = code; 7625f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 762653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfirstcu = reqcu = 0; 762753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfirstcuflags = reqcuflags = REQ_UNSET; 76288366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes 7629f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Accumulate the length for use in the pre-compile phase. Start with the 763053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskislength of the BRA and KET and any extra code units that are required at the 7631f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbeginning. We accumulate in a local variable to save frequent testing of 763253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskislengthptr for NULL. We cannot do this by looking at the value of 'code' at the 7633f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstart and end of each alternative, because compiled items are discarded during 7634f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichthe pre-compile phase so that the work space is not exceeded. */ 7635f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 763653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskislength = 2 + 2*LINK_SIZE + skipunits; 7637f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7638f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* WARNING: If the above line is changed for any reason, you must also change 7639f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichthe code that abstracts option settings at the start of the pattern and makes 7640f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichthem global. It tests the value of length for (2 + 2*LINK_SIZE) in the 764153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispre-compile phase to find out whether or not anything has yet been compiled. 7642f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 764353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisIf this is a capturing subpattern, add to the chain of open capturing items 7644f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichso that we can detect them if (*ACCEPT) is encountered. This is also used to 7645f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichdetect groups that contain recursive back references to themselves. Note that 7646f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichonly OP_CBRA need be tested here; changing this opcode to one of its variants, 7647f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kraleviche.g. OP_SCBRAPOS, happens later, after the group has been compiled. */ 7648f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7649f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichif (*code == OP_CBRA) 7650f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7651f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich capnumber = GET2(code, 1 + LINK_SIZE); 7652f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich capitem.number = capnumber; 765353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis capitem.next = cb->open_caps; 7654f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich capitem.flag = FALSE; 765553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->open_caps = &capitem; 7656f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7657f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7658f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Offset is set zero to mark that this bracket is still open */ 7659f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7660f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichPUT(code, 1, 0); 766153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscode += 1 + LINK_SIZE + skipunits; 7662f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7663f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Loop for each alternative branch */ 7664f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 766553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisorig_bracount = max_bracount = cb->bracount; 766653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 7667f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichfor (;;) 7668f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7669f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* For a (?| group, reset the capturing bracket count so that each branch 7670f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich uses the same numbers. */ 7671f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 767253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (reset_bracount) cb->bracount = orig_bracount; 7673f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7674f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Set up dummy OP_REVERSE if lookbehind assertion */ 7675f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7676f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (lookbehind) 7677f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7678f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code++ = OP_REVERSE; 7679f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich reverse_count = code; 7680f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUTINC(code, 0, 0); 7681f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich length += 1 + LINK_SIZE; 7682f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7683f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7684f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Now compile the branch; in the pre-compile phase its length gets added 7685f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich into the length. */ 7686f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 768753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstcu, 768853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis &branchfirstcuflags, &branchreqcu, &branchreqcuflags, &bc, 768953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cond_depth, cb, (lengthptr == NULL)? NULL : &length)) 7690f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7691f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *ptrptr = ptr; 7692f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich return FALSE; 7693f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7694f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7695f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Keep the highest bracket count in case (?| was used and some branch 7696f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich has fewer than the rest. */ 7697f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 769853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (cb->bracount > max_bracount) max_bracount = cb->bracount; 7699f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7700f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* In the real compile phase, there is some post-processing to be done. */ 7701f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7702f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (lengthptr == NULL) 7703f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 770453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If this is the first branch, the firstcu and reqcu values for the 7705f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich branch become the values for the regex. */ 7706f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7707f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*last_branch != OP_ALT) 7708f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 770953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis firstcu = branchfirstcu; 771053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis firstcuflags = branchfirstcuflags; 771153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcu = branchreqcu; 771253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcuflags = branchreqcuflags; 7713f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7714f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 771553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If this is not the first branch, the first char and reqcu have to 7716f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich match the values from all the previous branches, except that if the 771753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis previous value for reqcu didn't have REQ_VARY set, it can still match, 7718f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich and we set REQ_VARY for the regex. */ 7719f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7720f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 7721f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 772253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If we previously had a firstcu, but it doesn't match the new branch, 772353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis we have to abandon the firstcu for the regex, but if there was 772453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis previously no reqcu, it takes on the value of the old firstcu. */ 7725f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 772653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (firstcuflags != branchfirstcuflags || firstcu != branchfirstcu) 7727f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 772853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (firstcuflags >= 0) 7729f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 773053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (reqcuflags < 0) 773153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 773253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcu = firstcu; 773353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcuflags = firstcuflags; 773453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 7735f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 773653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis firstcuflags = REQ_NONE; 7737f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7738f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 773953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* If we (now or from before) have no firstcu, a firstcu from the 774053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis branch becomes a reqcu if there isn't a branch reqcu. */ 7741f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 774253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (firstcuflags < 0 && branchfirstcuflags >= 0 && 774353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis branchreqcuflags < 0) 7744f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 774553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis branchreqcu = branchfirstcu; 774653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis branchreqcuflags = branchfirstcuflags; 7747f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7748f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 774953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Now ensure that the reqcus match */ 7750f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 775153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (((reqcuflags & ~REQ_VARY) != (branchreqcuflags & ~REQ_VARY)) || 775253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcu != branchreqcu) 775353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcuflags = REQ_NONE; 7754f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 7755f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 775653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcu = branchreqcu; 775753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcuflags |= branchreqcuflags; /* To "or" REQ_VARY */ 7758f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7759f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7760f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7761f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If lookbehind, check that this branch matches a fixed-length string, and 7762f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich put the length into the OP_REVERSE item. Temporarily mark the end of the 776353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis branch with OP_END. If the branch contains OP_RECURSE, the result is 776453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis FFL_LATER (a negative value) because there may be forward references that 776553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis we can't check here. Set a flag to cause another lookbehind check at the 776653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis end. Why not do it all at the end? Because common errors can be picked up 776753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis here and the offset of the problem can be shown. */ 7768f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7769f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (lookbehind) 7770f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7771f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int fixed_length; 777253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int count = 0; 7773f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code = OP_END; 777453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis fixed_length = find_fixedlength(last_branch, (options & PCRE2_UTF) != 0, 777553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis FALSE, cb, NULL, &count); 777653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (fixed_length == FFL_LATER) 7777f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 777853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->check_lookbehind = TRUE; 7779f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7780f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (fixed_length < 0) 7781f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 778253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorcodeptr = fixed_length_errors[-fixed_length]; 7783f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *ptrptr = ptr; 7784f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich return FALSE; 7785f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7786f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 7787f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 778853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (fixed_length > cb->max_lookbehind) 778953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->max_lookbehind = fixed_length; 7790f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT(reverse_count, 0, fixed_length); 7791f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7792f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7793f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7794f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7795f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Reached end of expression, either ')' or end of pattern. In the real 7796f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich compile phase, go back through the alternative branches and reverse the chain 7797f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich of offsets, with the field in the BRA item now becoming an offset to the 7798f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich first alternative. If there are no alternatives, it points to the end of the 7799f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich group. The length in the terminating ket is always the length of the whole 7800f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bracketed item. Return leaving the pointer at the terminating char. */ 7801f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7802f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*ptr != CHAR_VERTICAL_LINE) 7803f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7804f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (lengthptr == NULL) 7805f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 780653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis size_t branch_length = code - last_branch; 7807f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich do 7808f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 780953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis size_t prev_length = GET(last_branch, 1); 7810f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT(last_branch, 1, branch_length); 7811f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich branch_length = prev_length; 7812f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich last_branch -= branch_length; 7813f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7814f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich while (branch_length > 0); 7815f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7816f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7817f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Fill in the ket */ 7818f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7819f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code = OP_KET; 7820f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT(code, 1, (int)(code - start_bracket)); 7821f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code += 1 + LINK_SIZE; 7822f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7823f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If it was a capturing subpattern, check to see if it contained any 782453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis recursive back references. If so, we must wrap it in atomic brackets. In 782553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis any event, remove the block from the chain. */ 7826f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7827f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (capnumber > 0) 7828f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 782953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (cb->open_caps->flag) 7830f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7831f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich memmove(start_bracket + 1 + LINK_SIZE, start_bracket, 783253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis CU2BYTES(code - start_bracket)); 7833f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *start_bracket = OP_ONCE; 7834f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code += 1 + LINK_SIZE; 7835f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT(start_bracket, 1, (int)(code - start_bracket)); 7836f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code = OP_KET; 7837f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT(code, 1, (int)(code - start_bracket)); 7838f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code += 1 + LINK_SIZE; 7839f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich length += 2 + 2*LINK_SIZE; 7840f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 784153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->open_caps = cb->open_caps->next; 7842f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7843f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7844f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Retain the highest bracket number, in case resetting was used. */ 7845f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 784653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb->bracount = max_bracount; 7847f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7848f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Set values to pass back */ 7849f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7850f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *codeptr = code; 7851f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *ptrptr = ptr; 785253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *firstcuptr = firstcu; 785353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *firstcuflagsptr = firstcuflags; 785453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *reqcuptr = reqcu; 785553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *reqcuflagsptr = reqcuflags; 7856f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (lengthptr != NULL) 7857f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7858f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (OFLOW_MAX - *lengthptr < length) 7859f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7860f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *errorcodeptr = ERR20; 7861f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich return FALSE; 7862f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7863f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *lengthptr += length; 7864f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7865f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich return TRUE; 7866f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7867f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7868f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Another branch follows. In the pre-compile phase, we can move the code 7869f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich pointer back to where it was for the start of the first branch. (That is, 7870f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich pretend that each branch is the only one.) 7871f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7872f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich In the real compile phase, insert an ALT node. Its length field points back 7873f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich to the previous branch while the bracket remains open. At the end the chain 7874f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich is reversed. It's done like this so that the start of the bracket has a 7875f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich zero offset until it is closed, making it possible to detect recursion. */ 7876f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7877f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (lengthptr != NULL) 7878f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 787953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code = *codeptr + 1 + LINK_SIZE + skipunits; 7880f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich length += 1 + LINK_SIZE; 7881f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7882f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else 7883f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7884f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code = OP_ALT; 7885f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT(code, 1, (int)(code - last_branch)); 7886f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bc.current_branch = last_branch = code; 7887f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code += 1 + LINK_SIZE; 7888f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7889f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 789053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Advance past the vertical bar */ 789153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 7892f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr++; 7893f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7894f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Control never reaches here */ 7895f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich} 7896f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7897f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7898f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7899f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/************************************************* 790053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Check for anchored pattern * 7901f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/ 7902f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7903f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Try to find out if this is an anchored regular expression. Consider each 7904f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichalternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket 7905f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichall of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then 7906f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichit's anchored. However, if this is a multiline pattern, then only OP_SOD will 7907f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbe found, because ^ generates OP_CIRCM in that mode. 7908f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7909f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichWe can also consider a regex to be anchored if OP_SOM starts all its branches. 7910f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichThis is the code for \G, which means "match at start of match position, taking 7911f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichinto account the match offset". 7912f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7913f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichA branch is also implicitly anchored if it starts with .* and DOTALL is set, 7914f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbecause that will try the rest of the pattern at all possible matching points, 7915f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichso there is no point trying again.... er .... 7916f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7917f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich.... except when the .* appears inside capturing parentheses, and there is a 7918f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichsubsequent back reference to those parentheses. We haven't enough information 7919f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichto catch that case precisely. 7920f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7921f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichAt first, the best we could do was to detect when .* was in capturing brackets 7922f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichand the highest back reference was greater than or equal to that level. 7923f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichHowever, by keeping a bitmap of the first 31 back references, we can catch some 7924f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichof the more common cases more precisely. 7925f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7926f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich... A second exception is when the .* appears inside an atomic group, because 7927f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichthis prevents the number of characters it matches from being adjusted. 7928f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7929f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments: 793053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code points to start of the compiled pattern 7931f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bracket_map a bitmap of which brackets we are inside while testing; this 793253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis handles up to substring 31; after that we just have to take 793353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis the less precise approach 793453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb points to the compile data block 7935f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich atomcount atomic group level 7936f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7937f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichReturns: TRUE or FALSE 7938f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/ 7939f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7940f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic BOOL 794153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisis_anchored(register PCRE2_SPTR code, unsigned int bracket_map, 794253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis compile_block *cb, int atomcount) 7943f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{ 7944f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichdo { 794553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_SPTR scode = first_significant_code( 7946f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code + PRIV(OP_lengths)[*code], FALSE); 7947f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich register int op = *scode; 7948f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7949f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Non-capturing brackets */ 7950f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7951f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (op == OP_BRA || op == OP_BRAPOS || 7952f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich op == OP_SBRA || op == OP_SBRAPOS) 7953f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 795453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!is_anchored(scode, bracket_map, cb, atomcount)) return FALSE; 7955f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7956f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7957f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Capturing brackets */ 7958f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7959f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (op == OP_CBRA || op == OP_CBRAPOS || 7960f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich op == OP_SCBRA || op == OP_SCBRAPOS) 7961f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 7962f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int n = GET2(scode, 1+LINK_SIZE); 796353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int new_map = bracket_map | ((n < 32)? (1u << n) : 1); 796453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!is_anchored(scode, new_map, cb, atomcount)) return FALSE; 7965f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7966f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7967f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Positive forward assertions and conditions */ 7968f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7969f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (op == OP_ASSERT || op == OP_COND) 7970f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 797153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!is_anchored(scode, bracket_map, cb, atomcount)) return FALSE; 7972f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7973f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7974f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Atomic groups */ 7975f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7976f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (op == OP_ONCE || op == OP_ONCE_NC) 7977f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 797853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!is_anchored(scode, bracket_map, cb, atomcount + 1)) 7979f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich return FALSE; 7980f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7981f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7982f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and 7983f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich it isn't in brackets that are or may be referenced or inside an atomic 798453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis group. There is also an option that disables auto-anchoring. */ 7985f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7986f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR || 7987f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich op == OP_TYPEPOSSTAR)) 7988f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 798953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (scode[1] != OP_ALLANY || (bracket_map & cb->backref_map) != 0 || 799053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis atomcount > 0 || cb->had_pruneorskip || 799153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0) 7992f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich return FALSE; 7993f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 7994f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7995f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Check for explicit anchoring */ 7996f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7997f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (op != OP_SOD && op != OP_SOM && op != OP_CIRC) return FALSE; 7998f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 7999f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code += GET(code, 1); 8000f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8001f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichwhile (*code == OP_ALT); /* Loop for each alternative */ 8002f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichreturn TRUE; 8003f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich} 8004f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8005f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8006f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8007f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/************************************************* 8008f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich* Check for starting with ^ or .* * 8009f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/ 8010f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8011f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* This is called to find out if every branch starts with ^ or .* so that 8012f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich"first char" processing can be done to speed things up in multiline 8013f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichmatching and for non-DOTALL patterns that start with .* (which must start at 8014f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichthe beginning or after \n). As in the case of is_anchored() (see above), we 8015f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichhave to take account of back references to capturing brackets that contain .* 8016f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbecause in that case we can't make the assumption. Also, the appearance of .* 8017f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichinside atomic brackets or in a pattern that contains *PRUNE or *SKIP does not 8018f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichcount, because once again the assumption no longer holds. 8019f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8020f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments: 802153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code points to start of the compiled pattern or a group 8022f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich bracket_map a bitmap of which brackets we are inside while testing; this 802353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis handles up to substring 31; after that we just have to take 802453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis the less precise approach 802553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb points to the compile data 8026f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich atomcount atomic group level 8027f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8028f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichReturns: TRUE or FALSE 8029f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/ 8030f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8031f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic BOOL 803253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisis_startline(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb, 803353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int atomcount) 8034f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{ 8035f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichdo { 803653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_SPTR scode = first_significant_code( 8037f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code + PRIV(OP_lengths)[*code], FALSE); 8038f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich register int op = *scode; 8039f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8040f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* If we are at the start of a conditional assertion group, *both* the 8041f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich conditional assertion *and* what follows the condition must satisfy the test 8042f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich for start of line. Other kinds of condition fail. Note that there may be an 8043f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich auto-callout at the start of a condition. */ 8044f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8045f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (op == OP_COND) 8046f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 8047f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich scode += 1 + LINK_SIZE; 804853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 8049f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT]; 805053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (*scode == OP_CALLOUT_STR) scode += GET(scode, 1 + 2*LINK_SIZE); 805153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 8052f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich switch (*scode) 8053f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 8054f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CREF: 8055f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_DNCREF: 8056f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_RREF: 8057f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_DNRREF: 80588366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes case OP_FAIL: 805953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_FALSE: 806053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case OP_TRUE: 8061f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich return FALSE; 8062f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8063f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich default: /* Assertion */ 806453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!is_startline(scode, bracket_map, cb, atomcount)) return FALSE; 8065f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich do scode += GET(scode, 1); while (*scode == OP_ALT); 8066f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich scode += 1 + LINK_SIZE; 8067f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 8068f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8069f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich scode = first_significant_code(scode, FALSE); 8070f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich op = *scode; 8071f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8072f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8073f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Non-capturing brackets */ 8074f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8075f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (op == OP_BRA || op == OP_BRAPOS || 8076f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich op == OP_SBRA || op == OP_SBRAPOS) 8077f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 807853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!is_startline(scode, bracket_map, cb, atomcount)) return FALSE; 8079f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8080f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8081f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Capturing brackets */ 8082f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8083f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (op == OP_CBRA || op == OP_CBRAPOS || 8084f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich op == OP_SCBRA || op == OP_SCBRAPOS) 8085f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 8086f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int n = GET2(scode, 1+LINK_SIZE); 808753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int new_map = bracket_map | ((n < 32)? (1u << n) : 1); 808853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!is_startline(scode, new_map, cb, atomcount)) return FALSE; 8089f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8090f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8091f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Positive forward assertions */ 8092f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8093f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (op == OP_ASSERT) 8094f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 809553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!is_startline(scode, bracket_map, cb, atomcount)) return FALSE; 8096f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8097f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8098f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Atomic brackets */ 8099f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8100f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (op == OP_ONCE || op == OP_ONCE_NC) 8101f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 810253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!is_startline(scode, bracket_map, cb, atomcount + 1)) return FALSE; 8103f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8104f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8105f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* .* means "start at start or after \n" if it isn't in atomic brackets or 8106f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich brackets that may be referenced, as long as the pattern does not contain 8107f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *PRUNE or *SKIP, because these break the feature. Consider, for example, 8108f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /.*?a(*PRUNE)b/ with the subject "aab", which matches "ab", i.e. not at the 810953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis start of a line. There is also an option that disables this optimization. */ 8110f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8111f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR) 8112f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 811353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (scode[1] != OP_ANY || (bracket_map & cb->backref_map) != 0 || 811453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis atomcount > 0 || cb->had_pruneorskip || 811553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0) 8116f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich return FALSE; 8117f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8118f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8119f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Check for explicit circumflex; anything else gives a FALSE result. Note 8120f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich in particular that this includes atomic brackets OP_ONCE and OP_ONCE_NC 8121f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich because the number of characters matched by .* cannot be adjusted inside 8122f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich them. */ 8123f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8124f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (op != OP_CIRC && op != OP_CIRCM) return FALSE; 8125f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8126f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Move on to the next alternative */ 8127f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8128f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code += GET(code, 1); 8129f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8130f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichwhile (*code == OP_ALT); /* Loop for each alternative */ 8131f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichreturn TRUE; 8132f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich} 8133f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8134f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8135f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8136f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/************************************************* 813753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* Check for asserted fixed first code unit * 8138f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/ 8139f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 814053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* During compilation, the "first code unit" settings from forward assertions 814153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisare discarded, because they can cause conflicts with actual literals that 814253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfollow. However, if we end up without a first code unit setting for an 814353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisunanchored pattern, it is worth scanning the regex to see if there is an 814453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisinitial asserted first code unit. If all branches start with the same asserted 814553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscode unit, or with a non-conditional bracket all of whose alternatives start 814653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswith the same asserted code unit (recurse ad lib), then we return that code 814753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisunit, with the flags set to zero or REQ_CASELESS; otherwise return zero with 814853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisREQ_NONE in the flags. 8149f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8150f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments: 815153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis code points to start of compiled pattern 815253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis flags points to the first code unit flags 8153f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich inassert TRUE if in an assertion 8154f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 815553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns: the fixed first code unit, or 0 with REQ_NONE in flags 8156f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/ 8157f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 815853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic uint32_t 815953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfind_firstassertedcu(PCRE2_SPTR code, int32_t *flags, BOOL inassert) 8160f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{ 816153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisregister uint32_t c = 0; 8162f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichint cflags = REQ_NONE; 8163f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8164f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*flags = REQ_NONE; 8165f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichdo { 816653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t d; 8167f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int dflags; 8168f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int xl = (*code == OP_CBRA || *code == OP_SCBRA || 8169f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0; 817053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_SPTR scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE); 817153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis register PCRE2_UCHAR op = *scode; 8172f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8173f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich switch(op) 8174f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 8175f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich default: 8176f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich return 0; 8177f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8178f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_BRA: 8179f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_BRAPOS: 8180f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CBRA: 8181f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_SCBRA: 8182f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CBRAPOS: 8183f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_SCBRAPOS: 8184f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_ASSERT: 8185f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_ONCE: 8186f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_ONCE_NC: 818753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis d = find_firstassertedcu(scode, &dflags, op == OP_ASSERT); 8188f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (dflags < 0) 8189f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich return 0; 819053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (cflags < 0) { c = d; cflags = dflags; } 819153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (c != d || cflags != dflags) return 0; 8192f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 8193f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8194f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_EXACT: 8195f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich scode += IMM2_SIZE; 8196f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Fall through */ 8197f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8198f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CHAR: 8199f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_PLUS: 8200f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_MINPLUS: 8201f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_POSPLUS: 8202f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (!inassert) return 0; 8203f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (cflags < 0) { c = scode[1]; cflags = 0; } 8204f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (c != scode[1]) return 0; 8205f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 8206f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8207f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_EXACTI: 8208f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich scode += IMM2_SIZE; 8209f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Fall through */ 8210f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8211f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_CHARI: 8212f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_PLUSI: 8213f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_MINPLUSI: 8214f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich case OP_POSPLUSI: 8215f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (!inassert) return 0; 8216f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; } 8217f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich else if (c != scode[1]) return 0; 8218f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 8219f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8220f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8221f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich code += GET(code, 1); 8222f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8223f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichwhile (*code == OP_ALT); 8224f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8225f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*flags = cflags; 8226f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichreturn c; 8227f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich} 8228f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8229f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8230f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8231f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/************************************************* 8232f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich* Add an entry to the name/number table * 8233f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/ 8234f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8235f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* This function is called between compiling passes to add an entry to the 8236f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichname/number table, maintaining alphabetical order. Checking for permitted 8237f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichand forbidden duplicates has already been done. 8238f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8239f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments: 824053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb the compile data block 8241f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich name the name to add 8242f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich length the length of the name 8243f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich groupno the group number 8244f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8245f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichReturns: nothing 8246f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/ 8247f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8248f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic void 824953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisadd_name_to_table(compile_block *cb, PCRE2_SPTR name, int length, 8250f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich unsigned int groupno) 8251f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{ 8252f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichint i; 825353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *slot = cb->name_table; 8254f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 825553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (i = 0; i < cb->names_found; i++) 8256f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 825753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int crc = memcmp(name, slot+IMM2_SIZE, CU2BYTES(length)); 8258f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (crc == 0 && slot[IMM2_SIZE+length] != 0) 8259f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich crc = -1; /* Current name is a substring */ 8260f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8261f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Make space in the table and break the loop for an earlier name. For a 8262f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich duplicate or later name, carry on. We do this for duplicates so that in the 8263f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich simple case (when ?(| is not used) they are in order of their numbers. In all 8264f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cases they are in the order in which they appear in the pattern. */ 8265f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8266f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (crc < 0) 8267f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 826853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis memmove(slot + cb->name_entry_size, slot, 826953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis CU2BYTES((cb->names_found - i) * cb->name_entry_size)); 8270f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 8271f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8272f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8273f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Continue the loop for a later or duplicate name */ 8274f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 827553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis slot += cb->name_entry_size; 8276f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8277f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8278f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichPUT2(slot, 0, groupno); 827953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismemcpy(slot + IMM2_SIZE, name, CU2BYTES(length)); 828053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb->names_found++; 828153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 828253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Add a terminating zero and fill the rest of the slot with zeroes so that 828353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe memory is all initialized. Otherwise valgrind moans about uninitialized 828453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismemory when saving serialized compiled patterns. */ 828553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 828653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismemset(slot + IMM2_SIZE + length, 0, 828753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis CU2BYTES(cb->name_entry_size - length - IMM2_SIZE)); 8288f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich} 8289f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8290f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8291f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8292f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/************************************************* 829353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis* External function to compile a pattern * 8294f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/ 8295f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 829653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function reads a regular expression in the form of a string and returns 829753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisa pointer to a block of store holding a compiled version of the expression. 8298f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8299f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments: 8300f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich pattern the regular expression 830153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis patlen the length of the pattern, or PCRE2_ZERO_TERMINATED 830253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis options option bits 830353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorptr pointer to errorcode 830453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis erroroffset pointer to error offset 830553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ccontext points to a compile context or is NULL 8306f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8307f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichReturns: pointer to compiled data block, or NULL on error, 830853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis with errorcode and erroroffset set 8309f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/ 8310f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 831153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION 831253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE patlen, uint32_t options, 831353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int *errorptr, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext) 8314f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{ 831553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisBOOL utf; /* Set TRUE for UTF mode */ 831653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispcre2_real_code *re = NULL; /* What we will return */ 831753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscompile_block cb; /* "Static" compile-time data */ 831853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisconst uint8_t *tables; /* Char tables base pointer */ 8319f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 832053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *code; /* Current pointer in compiled code */ 832153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR codestart; /* Start of compiled code */ 832253e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR ptr; /* Current pointer in pattern */ 8323f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 832453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissize_t length = 1; /* Allow or final END opcode */ 832553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissize_t usedlength; /* Actual length used */ 832653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissize_t re_blocksize; /* Size of memory block */ 832753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 832853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint32_t firstcuflags, reqcuflags; /* Type of first/req code unit */ 832953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t firstcu, reqcu; /* Value of first/req code unit */ 833053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t setflags = 0; /* NL and BSR set flags */ 833153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 833253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t skipatstart; /* When checking (*UTF) etc */ 833353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t limit_match = UINT32_MAX; /* Unset match limits */ 833453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t limit_recursion = UINT32_MAX; 8335f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 833653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint newline = 0; /* Unset; can be set by the pattern */ 833753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint bsr = 0; /* Unset; can be set by the pattern */ 833853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint errorcode = 0; /* Initialize to avoid compiler warn */ 833953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 834053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Comments at the head of this file explain about these variables. */ 834153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 834253e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *copied_pattern = NULL; 834353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR stack_copied_pattern[COPIED_PATTERN_SIZE]; 8344f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichnamed_group named_groups[NAMED_GROUP_LIST_SIZE]; 8345f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 834653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* The workspace is used in different ways in the different compiling phases. 834753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisIt needs to be 16-bit aligned for the preliminary group scan, and 32-bit 834853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisaligned for the group information cache. */ 834953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 835053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t c32workspace[C32_WORK_SIZE]; 835153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *cworkspace = (PCRE2_UCHAR *)c32workspace; 835253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 8353f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 835453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* -------------- Check arguments and set up the pattern ----------------- */ 8355f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 835653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* There must be error code and offset pointers. */ 8357f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 835853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (errorptr == NULL || erroroffset == NULL) return NULL; 835953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*errorptr = ERR0; 836053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*erroroffset = 0; 836153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 836253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* There must be a pattern! */ 836353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 836453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (pattern == NULL) 8365f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 836653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorptr = ERR16; 8367f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich return NULL; 8368f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8369f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 837053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Check that all undefined public option bits are zero. */ 8371f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 837253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((options & ~PUBLIC_COMPILE_OPTIONS) != 0) 8373f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 837453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorptr = ERR17; 837553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis return NULL; 8376f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8377f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 837853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* A NULL compile context means "use a default context" */ 8379f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 838053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (ccontext == NULL) 838153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ccontext = (pcre2_compile_context *)(&PRIV(default_compile_context)); 8382f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 838353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* A zero-terminated pattern is indicated by the special length value 838453e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_ZERO_TERMINATED. Otherwise, we make a copy of the pattern and add a zero, 838553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisto ensure that it is always possible to look one code unit beyond the end of 838653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe pattern's characters. In both cases, check that the pattern is overlong. */ 8387f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 838853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (patlen == PCRE2_ZERO_TERMINATED) 838953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 839053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis patlen = PRIV(strlen)(pattern); 839153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (patlen > ccontext->max_pattern_length) 839253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 839353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorptr = ERR88; 839453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis return NULL; 839553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 839653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 839753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiselse 8398f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 839953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (patlen > ccontext->max_pattern_length) 840053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 840153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorptr = ERR88; 840253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis return NULL; 840353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 840453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (patlen < COPIED_PATTERN_SIZE) 840553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis copied_pattern = stack_copied_pattern; 840653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else 840753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 840853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis copied_pattern = ccontext->memctl.malloc(CU2BYTES(patlen + 1), 840953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ccontext->memctl.memory_data); 841053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (copied_pattern == NULL) 841153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 841253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorptr = ERR21; 841353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis return NULL; 841453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 841553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 841653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis memcpy(copied_pattern, pattern, CU2BYTES(patlen)); 841753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis copied_pattern[patlen] = 0; 841853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis pattern = copied_pattern; 8419f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8420f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 842153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* ------------ Initialize the "static" compile data -------------- */ 842253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 842353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskistables = (ccontext->tables != NULL)? ccontext->tables : PRIV(default_tables); 842453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 842553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.lcc = tables + lcc_offset; /* Individual */ 842653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.fcc = tables + fcc_offset; /* character */ 842753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.cbits = tables + cbits_offset; /* tables */ 842853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.ctypes = tables + ctypes_offset; 842953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 843053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.assert_depth = 0; 843153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.bracount = cb.final_bracount = 0; 843253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.cx = ccontext; 843353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.dupnames = FALSE; 843453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.end_pattern = pattern + patlen; 843553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.nestptr[0] = cb.nestptr[1] = NULL; 843653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.external_flags = 0; 843753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.external_options = options; 843853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.groupinfo = c32workspace; 843953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.had_recurse = FALSE; 844053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.iscondassert = FALSE; 844153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.max_lookbehind = 0; 844253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.name_entry_size = 0; 844353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.name_table = NULL; 844453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.named_groups = named_groups; 844553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.named_group_list_size = NAMED_GROUP_LIST_SIZE; 844653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.names_found = 0; 844753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.open_caps = NULL; 844853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.parens_depth = 0; 844953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.req_varyopt = 0; 845053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.start_code = cworkspace; 845153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.start_pattern = pattern; 845253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.start_workspace = cworkspace; 845353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.workspace_size = COMPILE_WORK_SIZE; 845453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 845553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Maximum back reference and backref bitmap. The bitmap records up to 31 back 845653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreferences to help in deciding whether (.*) can be treated as anchored or not. 845753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/ 845853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 845953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.top_backref = 0; 846053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.backref_map = 0; 8461f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 846253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* --------------- Start looking at the pattern --------------- */ 8463f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 846453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Check for global one-time option settings at the start of the pattern, and 846553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisremember the offset to the actual regex. */ 8466f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 846753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisptr = pattern; 846853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisskipatstart = 0; 8469f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8470f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichwhile (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && 8471f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich ptr[skipatstart+1] == CHAR_ASTERISK) 8472f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 847353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis unsigned int i; 847453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (i = 0; i < sizeof(pso_list)/sizeof(pso); i++) 847553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 847653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis pso *p = pso_list + i; 8477f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 847853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (PRIV(strncmp_c8)(ptr+skipatstart+2, (char *)(p->name), p->length) == 0) 847953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 848053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uint32_t c, pp; 8481f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 848253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis skipatstart += p->length + 2; 848353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis switch(p->type) 848453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 848553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case PSO_OPT: 848653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb.external_options |= p->value; 848753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 8488f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 848953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case PSO_FLG: 849053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis setflags |= p->value; 849153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 8492f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 849353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case PSO_NL: 849453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis newline = p->value; 849553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis setflags |= PCRE2_NL_SET; 849653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 8497f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 849853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case PSO_BSR: 849953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis bsr = p->value; 850053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis setflags |= PCRE2_BSR_SET; 850153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 850253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 850353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case PSO_LIMM: 850453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case PSO_LIMR: 850553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = 0; 850653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis pp = skipatstart; 850753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (!IS_DIGIT(ptr[pp])) 850853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 850953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR60; 851053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += pp; 851153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto HAD_ERROR; 851253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 851353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis while (IS_DIGIT(ptr[pp])) 851453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 851553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */ 851653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis c = c*10 + (ptr[pp++] - CHAR_0); 851753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 851853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS) 851953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 852053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR60; 852153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ptr += pp; 852253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto HAD_ERROR; 852353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 852453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (p->type == PSO_LIMM) limit_match = c; 852553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else limit_recursion = c; 852653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis skipatstart += pp - skipatstart; 852753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 852853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 852953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; /* Out of the table scan loop */ 8530f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8531f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 853253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (i >= sizeof(pso_list)/sizeof(pso)) break; /* Out of pso loop */ 8533f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8534f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 853553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* End of pattern-start options; advance to start of real regex. */ 8536f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 853753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisptr += skipatstart; 8538f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 853953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Can't support UTF or UCP unless PCRE2 has been compiled with UTF support. */ 854053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 854153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifndef SUPPORT_UNICODE 854253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((cb.external_options & (PCRE2_UTF|PCRE2_UCP)) != 0) 8543f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 8544f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich errorcode = ERR32; 854553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto HAD_ERROR; 8546f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8547f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 8548f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 854953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Check UTF. We have the original options in 'options', with that value as 855053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismodified by (*UTF) etc in cb->external_options. */ 8551f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 855253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisutf = (cb.external_options & PCRE2_UTF) != 0; 855353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (utf) 8554f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 855553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_NEVER_UTF) != 0) 855653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 855753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR74; 855853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto HAD_ERROR; 855953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 856053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((options & PCRE2_NO_UTF_CHECK) == 0 && 856153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (errorcode = PRIV(valid_utf)(pattern, patlen, erroroffset)) != 0) 856253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto HAD_UTF_ERROR; 8563f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8564f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 856553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Check UCP lockout. */ 8566f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 856753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((cb.external_options & (PCRE2_UCP|PCRE2_NEVER_UCP)) == 856853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (PCRE2_UCP|PCRE2_NEVER_UCP)) 8569f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 857053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR75; 857153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto HAD_ERROR; 8572f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8573f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 857453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Process the BSR setting. */ 8575f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 857653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (bsr == 0) bsr = ccontext->bsr_convention; 8577f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 857853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Process the newline setting. */ 857953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 858053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (newline == 0) newline = ccontext->newline_convention; 858153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.nltype = NLTYPE_FIXED; 858253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisswitch(newline) 8583f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 858453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case PCRE2_NEWLINE_CR: 858553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb.nllen = 1; 858653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb.nl[0] = CHAR_CR; 858753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 858853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 858953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case PCRE2_NEWLINE_LF: 859053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb.nllen = 1; 859153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb.nl[0] = CHAR_NL; 859253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 859353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 859453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case PCRE2_NEWLINE_CRLF: 859553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb.nllen = 2; 859653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb.nl[0] = CHAR_CR; 859753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb.nl[1] = CHAR_NL; 859853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 859953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 860053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case PCRE2_NEWLINE_ANY: 860153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb.nltype = NLTYPE_ANY; 860253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 860353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 860453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case PCRE2_NEWLINE_ANYCRLF: 860553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb.nltype = NLTYPE_ANYCRLF; 860653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 860753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 860853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis default: 860953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR56; 861053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto HAD_ERROR; 8611f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 861253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 861353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Before we do anything else, do a pre-scan of the pattern in order to 861453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisdiscover the named groups and their numerical equivalents, so that this 861553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisinformation is always available for the remaining processing. */ 861653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 861753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiserrorcode = scan_for_captures(&ptr, cb.external_options, &cb); 861853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (errorcode != 0) goto HAD_ERROR; 861953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 862053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* For obscure debugging this code can be enabled. */ 862153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 862253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if 0 8623f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 862453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int i; 862553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis named_group *ng = cb.named_groups; 862653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis fprintf(stderr, "+++Captures: %d\n", cb.final_bracount); 862753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (i = 0; i < cb.names_found; i++, ng++) 8628f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 862953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis fprintf(stderr, "+++%3d %.*s\n", ng->number, ng->length, ng->name); 8630f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8631f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 863253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 8633f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 863453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Reset current bracket count to zero and current pointer to the start of the 863553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispattern. */ 8636f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 863753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.bracount = 0; 863853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisptr = pattern + skipatstart; 8639f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 864053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Pretend to compile the pattern while actually just accumulating the amount 864153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisof memory required in the 'length' variable. This behaviour is triggered by 864253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispassing a non-NULL final argument to compile_regex(). We pass a block of 864353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisworkspace (cworkspace) for it to compile parts of the pattern into; the 864453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscompiled code is discarded when it is no longer needed, so hopefully this 864553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisworkspace will never overflow, though there is a test for its doing so. 8646f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 864753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisOn error, errorcode will be set non-zero, so we don't need to look at the 864853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisresult of the function. The initial options have been put into the cb block so 864953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthat they can be changed if an option setting is found within the regex right 865053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisat the beginning. Bringing initial option settings outside can help speed up 865153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstarting point checks. We still have to pass a separate options variable (the 865253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfirst argument) because that may change as the pattern is processed. */ 8653f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8654f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichcode = cworkspace; 8655f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*code = OP_BRA; 8656f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 865753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis(void)compile_regex(cb.external_options, &code, &ptr, &errorcode, FALSE, 865853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis FALSE, 0, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, 865953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis &cb, &length); 8660f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 866153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (errorcode != 0) goto HAD_ERROR; 8662f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichif (length > MAX_PATTERN_SIZE) 8663f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 8664f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich errorcode = ERR20; 866553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto HAD_ERROR; 8666f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8667f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 866853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Compute the size of, and then get and initialize, the data block for storing 866953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe compiled pattern and names table. Integer overflow should no longer be 867053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispossible because nowadays we limit the maximum value of cb.names_found and 867153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.name_entry_size. */ 8672f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 867353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre_blocksize = sizeof(pcre2_real_code) + 867453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis CU2BYTES(length + cb.names_found * cb.name_entry_size); 867553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre = (pcre2_real_code *) 867653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ccontext->memctl.malloc(re_blocksize, ccontext->memctl.memory_data); 8677f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichif (re == NULL) 8678f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 8679f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich errorcode = ERR21; 868053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto HAD_ERROR; 8681f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8682f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 868353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->memctl = ccontext->memctl; 868453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->tables = tables; 868553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->executable_jit = NULL; 868653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismemset(re->start_bitmap, 0, 32 * sizeof(uint8_t)); 868753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->blocksize = re_blocksize; 8688f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichre->magic_number = MAGIC_NUMBER; 868953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->compile_options = options; 869053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->overall_options = cb.external_options; 869153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags | setflags; 8692f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichre->limit_match = limit_match; 8693f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichre->limit_recursion = limit_recursion; 869453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->first_codeunit = 0; 869553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->last_codeunit = 0; 869653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->bsr_convention = bsr; 869753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->newline_convention = newline; 869853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->max_lookbehind = 0; 869953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->minlength = 0; 870053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->top_bracket = 0; 870153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->top_backref = 0; 870253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->name_entry_size = cb.name_entry_size; 870353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->name_count = cb.names_found; 870453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 870553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* The basic block is immediately followed by the name table, and the compiled 870653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscode follows after that. */ 870753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 870853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscodestart = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code)) + 870953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis re->name_entry_size * re->name_count; 871053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 871153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Workspace is needed to remember information about numbered groups: whether a 871253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisgroup can match an empty string and what its fixed length is. This is done to 871353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisavoid the possibility of recursive references causing very long compile times 871453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswhen checking these features. Unnumbered groups do not have this exposure since 871553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthey cannot be referenced. We use an indexed vector for this purpose. If there 871653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisare sufficiently few groups, it can be the c32workspace vector, as set up 871753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisabove. Otherwise we have to get/free a special vector. The vector must be 871853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisinitialized to zero. */ 871953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 872053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (cb.final_bracount >= C32_WORK_SIZE) 872153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 872253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb.groupinfo = ccontext->memctl.malloc( 872353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (cb.final_bracount + 1)*sizeof(uint32_t), ccontext->memctl.memory_data); 872453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (cb.groupinfo == NULL) 872553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 872653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR21; 872753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto HAD_ERROR; 872853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 872953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 873053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismemset(cb.groupinfo, 0, (cb.final_bracount + 1) * sizeof(uint32_t)); 873153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 873253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Update the compile data block for the actual compile. The starting points of 873353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe name/number translation table and of the code are passed around in the 873453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscompile data block. The start/end pattern and initial options are already set 873553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfrom the pre-compile phase, as is the name_entry_size field. Reset the bracket 873653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscount and the names_found field. */ 873753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 873853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.parens_depth = 0; 873953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.assert_depth = 0; 874053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.bracount = 0; 874153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.max_lookbehind = 0; 874253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.name_table = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)); 874353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.start_code = codestart; 874453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.iscondassert = FALSE; 874553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.req_varyopt = 0; 874653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.had_accept = FALSE; 874753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.had_pruneorskip = FALSE; 874853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.check_lookbehind = FALSE; 874953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.open_caps = NULL; 8750f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8751f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* If any named groups were found, create the name/number table from the list 875253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscreated in the pre-pass. */ 8753f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 875453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (cb.names_found > 0) 8755f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 875653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int i = cb.names_found; 875753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis named_group *ng = cb.named_groups; 875853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cb.names_found = 0; 8759f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich for (; i > 0; i--, ng++) 876053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis add_name_to_table(&cb, ng->name, ng->length, ng->number); 8761f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8762f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8763f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Set up a starting, non-extracting bracket, then compile the expression. On 8764f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevicherror, errorcode will be set non-zero, so we don't need to look at the result 8765f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichof the function here. */ 8766f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 876753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisptr = pattern + skipatstart; 876853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscode = (PCRE2_UCHAR *)codestart; 8769f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*code = OP_BRA; 877053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis(void)compile_regex(re->overall_options, &code, &ptr, &errorcode, FALSE, FALSE, 877153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 0, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, &cb, NULL); 8772f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 877353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->top_bracket = cb.bracount; 877453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->top_backref = cb.top_backref; 877553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->max_lookbehind = cb.max_lookbehind; 8776f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 877753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (cb.had_accept) 877853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 877953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcu = 0; /* Must disable after (*ACCEPT) */ 878053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis reqcuflags = REQ_NONE; 878153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 8782f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 87838b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis/* Fill in the final opcode and check for disastrous overflow. If no overflow, 87848b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisbut the estimated length exceeds the really used length, adjust the value of 87858b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisre->blocksize, and if valgrind support is configured, mark the extra allocated 87868b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskismemory as unaddressable, so that any out-of-bound reads can be detected. */ 8787f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8788f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*code++ = OP_END; 878953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisusedlength = code - codestart; 879053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (usedlength > length) errorcode = ERR23; else 879153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 879253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis re->blocksize -= CU2BYTES(length - usedlength); 8793f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#ifdef SUPPORT_VALGRIND 879453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis VALGRIND_MAKE_MEM_NOACCESS(code, CU2BYTES(length - usedlength)); 8795f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 879653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 879753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 879853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Scan the pattern for recursion/subroutine calls and convert the group 879953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnumbers into offsets. Maintain a small cache so that repeated groups containing 880053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisrecursions are efficiently handled. */ 8801f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 880253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define RSCAN_CACHE_SIZE 8 8803f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 880453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (errorcode == 0 && cb.had_recurse) 8805f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 880653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_UCHAR *rcode; 880753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_SPTR rgroup; 880853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int ccount = 0; 880953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int start = RSCAN_CACHE_SIZE; 881053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis recurse_cache rc[RSCAN_CACHE_SIZE]; 881153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 881253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (rcode = (PCRE2_UCHAR *)find_recurse(codestart, utf); 881353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis rcode != NULL; 881453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis rcode = (PCRE2_UCHAR *)find_recurse(rcode + 1 + LINK_SIZE, utf)) 8815f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 881653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int i, p, recno; 88170ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes 881853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis recno = (int)GET(rcode, 1); 881953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (recno == 0) rgroup = codestart; else 88200ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes { 882153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_SPTR search_from = codestart; 882253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis rgroup = NULL; 882353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (i = 0, p = start; i < ccount; i++, p = (p + 1) & 7) 882453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 882553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (recno == rc[p].recno) 882653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 882753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis rgroup = rc[p].group; 882853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 882953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 88300ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes 883153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Group n+1 must always start to the right of group n, so we can save 883253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis search time below when the new group number is greater than any of the 883353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis previously found groups. */ 883453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 883553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (recno > rc[p].recno) search_from = rc[p].group; 883653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 883753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 883853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (rgroup == NULL) 883953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 884053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis rgroup = PRIV(find_bracket)(search_from, utf, recno); 884153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (rgroup == NULL) 884253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 884353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR53; 884453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis break; 884553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 884653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (--start < 0) start = RSCAN_CACHE_SIZE - 1; 884753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis rc[start].recno = recno; 884853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis rc[start].group = rgroup; 884953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (ccount < RSCAN_CACHE_SIZE) ccount++; 885053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 8851f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 885253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 885353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PUT(rcode, 1, rgroup - codestart); 8854f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8855f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8856f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 885753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* In rare debugging situations we sometimes need to look at the compiled code 885853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisat this stage. */ 8859f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 886053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef CALL_PRINTINT 886153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispcre2_printint(re, stderr, TRUE); 886253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfprintf(stderr, "Length=%lu Used=%lu\n", length, usedlength); 886353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif 8864f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 886553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* After a successful compile, give an error if there's back reference to a 886653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnon-existent capturing subpattern. Then, unless disabled, check whether any 886753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissingle character iterators can be auto-possessified. The function overwrites 886853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe appropriate opcode values, so the type of the pointer must be cast. NOTE: 886953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe intermediate variable "temp" is used in this code because at least one 887053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscompiler gives a warning about loss of "const" attribute if the cast 887153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis(PCRE2_UCHAR *)codestart is used directly in the function call. */ 8872f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 887353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (errorcode == 0) 8874f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 887553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (re->top_backref > re->top_bracket) errorcode = ERR15; 887653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if ((re->overall_options & PCRE2_NO_AUTO_POSSESS) == 0) 887753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 887853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_UCHAR *temp = (PCRE2_UCHAR *)codestart; 887953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (PRIV(auto_possessify)(temp, utf, &cb) != 0) errorcode = ERR80; 888053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 8881f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8882f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8883f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* If there were any lookbehind assertions that contained OP_RECURSE 8884f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich(recursions or subroutine calls), a flag is set for them to be checked here, 8885f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbecause they may contain forward references. Actual recursions cannot be fixed 8886f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichlength, but subroutine calls can. It is done like this so that those without 8887f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichOP_RECURSE that are not fixed length get a diagnosic with a useful offset. The 8888f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichexceptional ones forgo this. We scan the pattern to check that they are fixed 8889f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichlength, and set their lengths. */ 8890f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 889153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (errorcode == 0 && cb.check_lookbehind) 8892f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 889353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_UCHAR *cc = (PCRE2_UCHAR *)codestart; 8894f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8895f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich /* Loop, searching for OP_REVERSE items, and process those that do not have 8896f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich their length set. (Actually, it will also re-process any that have a length 8897f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich of zero, but that is a pathological case, and it does no harm.) When we find 889853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis one, we temporarily terminate the branch it is in while we scan it. Note that 889953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis calling find_bracket() with a negative group number returns a pointer to the 890053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis OP_REVERSE item, not the actual lookbehind. */ 8901f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 890253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis for (cc = (PCRE2_UCHAR *)PRIV(find_bracket)(codestart, utf, -1); 8903f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cc != NULL; 890453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis cc = (PCRE2_UCHAR *)PRIV(find_bracket)(cc, utf, -1)) 8905f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 8906f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (GET(cc, 1) == 0) 8907f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 8908f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int fixed_length; 890953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int count = 0; 891053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PCRE2_UCHAR *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE); 8911f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich int end_op = *be; 8912f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *be = OP_END; 891353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis fixed_length = find_fixedlength(cc, utf, TRUE, &cb, NULL, &count); 8914f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich *be = end_op; 8915f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich if (fixed_length < 0) 8916f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 891753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = fixed_length_errors[-fixed_length]; 8918f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 8919f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 892053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (fixed_length > cb.max_lookbehind) cb.max_lookbehind = fixed_length; 8921f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich PUT(cc, 1, fixed_length); 8922f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8923f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich cc += 1 + LINK_SIZE; 8924f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 892553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 892653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* The previous value of the maximum lookbehind was transferred to the 892753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis compiled regex block above. We could have updated this value in the loop 892853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis above, but keep the two values in step, just in case some later code below 892953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis uses the cb value. */ 893053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 893153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis re->max_lookbehind = cb.max_lookbehind; 8932f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8933f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 893453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Failed to compile, or error while post-processing. Earlier errors get here 893553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisvia the dreaded goto. */ 8936f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 8937f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichif (errorcode != 0) 8938f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 893953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis HAD_ERROR: 894053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *erroroffset = (int)(ptr - pattern); 894153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis HAD_UTF_ERROR: 894253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis *errorptr = errorcode; 894353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis pcre2_code_free(re); 894453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis re = NULL; 894553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto EXIT; 8946f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8947f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 894853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Successful compile. If the anchored option was not passed, set it if 894953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswe can determine that the pattern is anchored by virtue of ^ characters or \A 895053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisor anything else, such as starting with non-atomic .* when DOTALL is set and 895153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthere are no occurrences of *PRUNE or *SKIP (though there is an option to 895253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisdisable this case). */ 895353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 895453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((re->overall_options & PCRE2_ANCHORED) == 0 && 895553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis is_anchored(codestart, 0, &cb, 0)) 895653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis re->overall_options |= PCRE2_ANCHORED; 8957f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 895853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* If the pattern is still not anchored and we do not have a first code unit, 895953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissee if there is one that is asserted (these are not saved during the compile 896053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisbecause they can cause conflicts with actual literals that follow). This code 896153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisneed not be obeyed if PCRE2_NO_START_OPTIMIZE is set, as the data it would 896253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscreate will not be used. */ 8963f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 896453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0) 8965f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 896653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (firstcuflags < 0) 896753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis firstcu = find_firstassertedcu(codestart, &firstcuflags, FALSE); 8968f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 896953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Save the data for a first code unit. */ 8970f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 897153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (firstcuflags >= 0) 897253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 897353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis re->first_codeunit = firstcu; 897453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis re->flags |= PCRE2_FIRSTSET; 8975f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 897653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Handle caseless first code units. */ 8977f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 897853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((firstcuflags & REQ_CASELESS) != 0) 8979f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 898053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (firstcu < 128 || (!utf && firstcu < 255)) 8981f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 898253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (cb.fcc[firstcu] != firstcu) re->flags |= PCRE2_FIRSTCASELESS; 8983f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 898453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 898553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* The first code unit is > 128 in UTF mode, or > 255 otherwise. In 898653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 8-bit UTF mode, codepoints in the range 128-255 are introductory code 898753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis points and cannot have another case. In 16-bit and 32-bit modes, we can 898853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis check wide characters when UTF (and therefore UCP) is supported. */ 898953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 899053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8 899153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (firstcu <= MAX_UTF_CODE_POINT && 899253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis UCD_OTHERCASE(firstcu) != firstcu) 899353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis re->flags |= PCRE2_FIRSTCASELESS; 8994f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 899553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 8996f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 8997f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 899853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* When there is no first code unit, see if we can set the PCRE2_STARTLINE 899953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis flag. This is helpful for multiline matches when all branches start with ^ 900053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis and also when all branches start with non-atomic .* for non-DOTALL matches 900153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis when *PRUNE and SKIP are not present. (There is an option that disables this 900253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis case.) */ 9003f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 900453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (is_startline(codestart, 0, &cb, 0)) re->flags |= PCRE2_STARTLINE; 900553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 9006f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 900753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Handle the "required code unit", if one is set. In the case of an anchored 900853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispattern, do this only if it follows a variable length item in the pattern. 900953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisAgain, skip this if PCRE2_NO_START_OPTIMIZE is set. */ 9010f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 901153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (reqcuflags >= 0 && 901253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0 || 901353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis (reqcuflags & REQ_VARY) != 0)) 9014f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 901553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis re->last_codeunit = reqcu; 901653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis re->flags |= PCRE2_LASTSET; 9017f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 901853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis /* Handle caseless required code units as for first code units (above). */ 9019f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 902053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if ((reqcuflags & REQ_CASELESS) != 0) 902153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 902253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (reqcu < 128 || (!utf && reqcu < 255)) 902353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 902453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS; 902553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 902653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8 902753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis else if (reqcu <= MAX_UTF_CODE_POINT && UCD_OTHERCASE(reqcu) != reqcu) 902853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis re->flags |= PCRE2_LASTCASELESS; 9029f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif 903053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 9031f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 9032f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 9033f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Check for a pattern than can match an empty string, so that this information 9034f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichcan be provided to applications. */ 9035f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 9036f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichdo 9037f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 903853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int count = 0; 903953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis int rc = could_be_empty_branch(codestart, code, utf, &cb, TRUE, NULL, &count); 904053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (rc < 0) 904153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 904253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR86; 904353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto HAD_ERROR; 904453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 904553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis if (rc > 0) 9046f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich { 904753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis re->flags |= PCRE2_MATCH_EMPTY; 9048f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich break; 9049f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 9050f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich codestart += GET(codestart, 1); 9051f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich } 9052f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichwhile (*codestart == OP_ALT); 9053f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 905453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Finally, unless PCRE2_NO_START_OPTIMIZE is set, study the compiled pattern 905553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisto set up information such as a bitmap of starting code units and a minimum 905653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismatching length. */ 905753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 905853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && 905953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis PRIV(study)(re) != 0) 906053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis { 906153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis errorcode = ERR31; 906253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis goto HAD_ERROR; 906353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis } 906453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 906553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Control ends up here in all cases. If memory was obtained for a 906653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiszero-terminated copy of the pattern, remember to free it before returning. Also 906753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfree the list of named groups if a larger one had to be obtained, and likewise 906853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe group information vector. */ 906953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 907053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisEXIT: 907153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (copied_pattern != stack_copied_pattern) 907253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ccontext->memctl.free(copied_pattern, ccontext->memctl.memory_data); 907353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (cb.named_group_list_size > NAMED_GROUP_LIST_SIZE) 907453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ccontext->memctl.free((void *)cb.named_groups, ccontext->memctl.memory_data); 907553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (cb.groupinfo != c32workspace) 907653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis ccontext->memctl.free((void *)cb.groupinfo, ccontext->memctl.memory_data); 907753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis 907853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn re; /* Will be NULL after an error */ 9079f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich} 9080f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich 908153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* End of pcre2_compile.c */ 9082