1f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*************************************************
2f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*      Perl-Compatible Regular Expressions       *
3f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/
4f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* PCRE is a library of functions to support regular expressions whose syntax
6f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichand semantics are as close as possible to those of the Perl 5 language.
7f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                       Written by Philip Hazel
953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis     Original API code Copyright (c) 1997-2012 University of Cambridge
1053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis         New API code Copyright (c) 2016 University of Cambridge
11f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
12f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich-----------------------------------------------------------------------------
13f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichRedistribution and use in source and binary forms, with or without
14f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichmodification, are permitted provided that the following conditions are met:
15f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
16f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    * Redistributions of source code must retain the above copyright notice,
17f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      this list of conditions and the following disclaimer.
18f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
19f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    * Redistributions in binary form must reproduce the above copyright
20f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      notice, this list of conditions and the following disclaimer in the
21f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      documentation and/or other materials provided with the distribution.
22f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
23f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    * Neither the name of the University of Cambridge nor the names of its
24f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      contributors may be used to endorse or promote products derived from
25f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      this software without specific prior written permission.
26f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
27f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichLIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichPOSSIBILITY OF SUCH DAMAGE.
38f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich-----------------------------------------------------------------------------
39f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/
40f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
41f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
42f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#ifdef HAVE_CONFIG_H
43f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#include "config.h"
44f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
45f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define NLBLOCK cb             /* Block containing newline information */
4753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define PSSTART start_pattern  /* Field containing processed string start */
4853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define PSEND   end_pattern    /* Field containing processed string end */
49f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#include "pcre2_internal.h"
51f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* In rare error cases debugging might require calling pcre2_printint(). */
53f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if 0
5553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef EBCDIC
5653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define PRINTABLE(c) ((c) >= 64 && (c) < 255)
5753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else
5853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
5953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
6053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#include "pcre2_printint.c"
6153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define CALL_PRINTINT
62f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
63f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* There are a few things that vary with different code unit sizes. Handle them
6553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisby defining macros in order to minimize #if usage. */
66f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if PCRE2_CODE_UNIT_WIDTH == 8
6853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define STRING_UTFn_RIGHTPAR     STRING_UTF8_RIGHTPAR, 5
6953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define XDIGIT(c)                xdigitab[c]
70f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else  /* Either 16-bit or 32-bit */
7253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define XDIGIT(c)                (MAX_255(c)? xdigitab[c] : 0xff)
73f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if PCRE2_CODE_UNIT_WIDTH == 16
7553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define STRING_UTFn_RIGHTPAR     STRING_UTF16_RIGHTPAR, 6
76f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else  /* 32-bit */
7853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define STRING_UTFn_RIGHTPAR     STRING_UTF32_RIGHTPAR, 6
7953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
8053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
81f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Function definitions to allow mutual recursion */
83f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
848b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisstatic unsigned int
8553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  add_list_to_class(uint8_t *, PCRE2_UCHAR **, uint32_t, compile_block *,
8653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    const uint32_t *, unsigned int);
87f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
88f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic BOOL
8953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  compile_regex(uint32_t, PCRE2_UCHAR **, PCRE2_SPTR *, int *, BOOL, BOOL,
9053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    uint32_t, int, uint32_t *, int32_t *, uint32_t *, int32_t *,
9153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    branch_chain *, compile_block *, size_t *);
92f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
93f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
94f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
95f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*************************************************
96f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*      Code parameters and static tables         *
97f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/
98f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
9953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This value specifies the size of stack workspace, which is used in different
10053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisways in the different pattern scans. The group-identifying pre-scan uses it to
10153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskishandle nesting, and needs it to be 16-bit aligned.
102f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
10353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisDuring the first compiling phase, when determining how much memory is required,
10453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe regex is partly compiled into this space, but the compiled parts are
10553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisdiscarded as soon as they can be, so that hopefully there will never be an
10653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisoverrun. The code does, however, check for an overrun, which can occur for
10753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispathological patterns. The size of the workspace depends on LINK_SIZE because
10853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe length of compiled items varies with this.
109f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
11053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisIn the real compile phase, the workspace is used for remembering data about
11153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnumbered groups, provided there are not too many of them (if there are, extra
11253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismemory is acquired). For this phase the memory must be 32-bit aligned. Having
11353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisdefined the size in code units, we set up C32_WORK_SIZE as the number of
11453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiselements in the 32-bit vector. */
11553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
11653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define COMPILE_WORK_SIZE (2048*LINK_SIZE)   /* Size in code units */
11753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
11853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define C32_WORK_SIZE \
11953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ((COMPILE_WORK_SIZE * sizeof(PCRE2_UCHAR))/sizeof(uint32_t))
120f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
121f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* The overrun tests check for a slightly smaller size so that they detect the
122f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichoverrun before it actually does run off the end of the data block. */
123f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
124f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#define WORK_SIZE_SAFETY_MARGIN (100)
125f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
12653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This value determines the size of the initial vector that is used for
12753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisremembering named groups during the pre-compile. It is allocated on the stack,
12853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisbut if it is too small, it is expanded, in a similar way to the workspace. The
12953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisvalue is the number of slots in the list. */
13053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
13153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define NAMED_GROUP_LIST_SIZE  20
13253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
13353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* The original PCRE required patterns to be zero-terminated, and it simplifies
13453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe compiling code if it is guaranteed that there is a zero code unit at the
13553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisend of the pattern, because this means that tests for coding sequences such as
13653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis(*SKIP) or even just (?<= can check a sequence of code units without having to
13753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiskeep checking for the end of the pattern. The new PCRE2 API allows zero code
13853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisunits within patterns if a positive length is given, but in order to keep most
13953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisof the compiling code as it was, we copy such patterns and add a zero on the
14053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisend. This value determines the size of space on the stack that is used if the
14153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispattern fits; if not, heap memory is used. */
14253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
14353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define COPIED_PATTERN_SIZE 1024
14453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
14553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Maximum length value to check against when making sure that the variable
14653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthat holds the compiled pattern length does not overflow. We make it a bit less
14753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthan INT_MAX to allow for adding in group terminating bytes, so that we don't
14853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskishave to check them every time. */
14953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
15053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define OFLOW_MAX (INT_MAX - 20)
15153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
1528b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis/* Macro for setting individual bits in class bitmaps. It took some
1538b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisexperimenting to figure out how to stop gcc 5.3.0 from warning with
1548b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis-Wconversion. This version gets a warning:
1558b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis
1568b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis  #define SETBIT(a,b) a[(b)/8] |= (uint8_t)(1 << ((b)&7))
1578b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis
1588b979b2abae173bb836d8e85a842cfd00447d4beJanis DanisevskisLet's hope the apparently less efficient version isn't actually so bad if the
1598b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskiscompiler is clever with identical subexpressions. */
16053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
1618b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis#define SETBIT(a,b) a[(b)/8] = (uint8_t)(a[(b)/8] | (1 << ((b)&7)))
16253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
16353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Private flags added to firstcu and reqcu. */
164f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
165f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#define REQ_CASELESS    (1 << 0)        /* Indicates caselessness */
16653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define REQ_VARY        (1 << 1)        /* reqcu followed non-literal item */
16753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Negative values for the firstcu and reqcu flags */
16853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define REQ_UNSET       (-2)            /* Not yet found anything */
16953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define REQ_NONE        (-1)            /* Found not fixed char */
17053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
17153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* These flags are used in the groupinfo vector. */
17253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
17353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define GI_SET_COULD_BE_EMPTY  0x80000000u
17453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define GI_COULD_BE_EMPTY      0x40000000u
17553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define GI_NOT_FIXED_LENGTH    0x20000000u
17653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define GI_SET_FIXED_LENGTH    0x10000000u
17753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define GI_FIXED_LENGTH_MASK   0x0000ffffu
17853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
17953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This bit (which is greater than any UTF value) is used to indicate that a
18053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisvariable contains a number of code units instead of an actual code point. */
181f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
18253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define UTF_LENGTH     0x10000000l
183f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
18453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This simple test for a decimal digit works for both ASCII/Unicode and EBCDIC
18553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisand is fast (a good compiler can turn it into a subtraction and unsigned
18653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscomparison). */
187f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
18853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define IS_DIGIT(x) ((x) >= CHAR_0 && (x) <= CHAR_9)
18953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
19053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Table to identify hex digits. The tables in chartables are dependent on the
19153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskislocale, and may mark arbitrary characters as digits. We want to recognize only
19253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis0-9, a-z, and A-Z as hex digits, which is why we have a private table here. It
19353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscosts 256 bytes, but it is a lot faster than doing character value tests (at
19453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisleast in some simple cases I timed), and in some applications one wants PCRE to
19553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscompile efficiently as well as match efficiently. The value in the table is
19653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe binary hex digit value, or 0xff for non-hex digits. */
19753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
19853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
19953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisUTF-8 mode. */
200f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
201f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#ifndef EBCDIC
20253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const uint8_t xdigitab[] =
20353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
20453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*   0-  7 */
20553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*   8- 15 */
20653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  16- 23 */
20753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  24- 31 */
20853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*    - '  */
20953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  ( - /  */
21053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, /*  0 - 7  */
21153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0x08,0x09,0xff,0xff,0xff,0xff,0xff,0xff, /*  8 - ?  */
21253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0xff, /*  @ - G  */
21353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  H - O  */
21453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  P - W  */
21553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  X - _  */
21653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0xff, /*  ` - g  */
21753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  h - o  */
21853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  p - w  */
21953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  x -127 */
22053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 128-135 */
22153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 136-143 */
22253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 144-151 */
22353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 152-159 */
22453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 160-167 */
22553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 168-175 */
22653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 176-183 */
22753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 184-191 */
22853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 192-199 */
22953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 2ff-207 */
23053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 208-215 */
23153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 216-223 */
23253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 224-231 */
23353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 232-239 */
23453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 240-247 */
23553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff};/* 248-255 */
23653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
23753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else
23853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
23953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */
24053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
24153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const uint8_t xdigitab[] =
24253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
24353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*   0-  7  0 */
24453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*   8- 15    */
24553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  16- 23 10 */
24653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  24- 31    */
24753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  32- 39 20 */
24853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  40- 47    */
24953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  48- 55 30 */
25053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  56- 63    */
25153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*    - 71 40 */
25253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  72- |     */
25353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  & - 87 50 */
25453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  88- 95    */
25553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  - -103 60 */
25653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 104- ?     */
25753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 112-119 70 */
25853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 120- "     */
25953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0xff, /* 128- g  80 */
26053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  h -143    */
26153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 144- p  90 */
26253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  q -159    */
26353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 160- x  A0 */
26453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  y -175    */
26553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  ^ -183 B0 */
26653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /* 184-191    */
26753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0xff, /*  { - G  C0 */
26853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  H -207    */
26953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  } - P  D0 */
27053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  Q -223    */
27153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  \ - X  E0 */
27253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, /*  Y -239    */
27353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, /*  0 - 7  F0 */
27453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  0x08,0x09,0xff,0xff,0xff,0xff,0xff,0xff};/*  8 -255    */
27553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif  /* EBCDIC */
27653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
27753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
27853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Table for handling alphanumeric escaped characters. Positive returns are
27953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissimple data values; negative values are for special things like \d and so on.
28053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisZero means further processing is needed (for things like \x), or the escape is
28153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisinvalid. */
282f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
283f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* This is the "normal" table for ASCII systems or for EBCDIC systems running
28453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisin UTF-8 mode. It runs from '0' to 'z'. */
28553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
28653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifndef EBCDIC
28753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define ESCAPES_FIRST       CHAR_0
28853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define ESCAPES_LAST        CHAR_z
28953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define UPPER_CASE(c)       (c-32)
290f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
291f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic const short int escapes[] = {
292f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     0,                       0,
293f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     0,                       0,
294f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     0,                       0,
295f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     0,                       0,
296f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     0,                       0,
297f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     CHAR_COLON,              CHAR_SEMICOLON,
298f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     CHAR_LESS_THAN_SIGN,     CHAR_EQUALS_SIGN,
299f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     CHAR_GREATER_THAN_SIGN,  CHAR_QUESTION_MARK,
300f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     CHAR_COMMERCIAL_AT,      -ESC_A,
301f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     -ESC_B,                  -ESC_C,
302f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     -ESC_D,                  -ESC_E,
303f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     0,                       -ESC_G,
304f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     -ESC_H,                  0,
305f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     0,                       -ESC_K,
306f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     0,                       0,
307f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     -ESC_N,                  0,
308f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     -ESC_P,                  -ESC_Q,
309f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     -ESC_R,                  -ESC_S,
310f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     0,                       0,
311f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     -ESC_V,                  -ESC_W,
312f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     -ESC_X,                  0,
313f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     -ESC_Z,                  CHAR_LEFT_SQUARE_BRACKET,
314f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     CHAR_BACKSLASH,          CHAR_RIGHT_SQUARE_BRACKET,
315f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     CHAR_CIRCUMFLEX_ACCENT,  CHAR_UNDERSCORE,
3160ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes     CHAR_GRAVE_ACCENT,       ESC_a,
317f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     -ESC_b,                  0,
318f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     -ESC_d,                  ESC_e,
319f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     ESC_f,                   0,
320f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     -ESC_h,                  0,
321f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     0,                       -ESC_k,
322f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     0,                       0,
323f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     ESC_n,                   0,
324f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     -ESC_p,                  0,
325f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     ESC_r,                   -ESC_s,
326f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     ESC_tee,                 0,
327f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     -ESC_v,                  -ESC_w,
328f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     0,                       0,
329f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     -ESC_z
330f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich};
331f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
332f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#else
333f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
33453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This is the "abnormal" table for EBCDIC systems without UTF-8 support.
33553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisIt runs from 'a' to '9'. For some minimal testing of EBCDIC features, the code
33653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisis sometimes compiled on an ASCII system. In this case, we must not use CHAR_a
33753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisbecause it is defined as 'a', which of course picks up the ASCII value. */
33853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
33953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if 'a' == 0x81                    /* Check for a real EBCDIC environment */
34053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define ESCAPES_FIRST       CHAR_a
34153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define ESCAPES_LAST        CHAR_9
34253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define UPPER_CASE(c)       (c+64)
34353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else                              /* Testing in an ASCII environment */
34453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define ESCAPES_FIRST  ((unsigned char)'\x81')   /* EBCDIC 'a' */
34553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define ESCAPES_LAST   ((unsigned char)'\xf9')   /* EBCDIC '9' */
34653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define UPPER_CASE(c)  (c-32)
34753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
348f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
349f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic const short int escapes[] = {
35053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/*  80 */        ESC_a, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0,
351f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*  88 */-ESC_h,     0,      0,     '{',      0,     0,      0,      0,
3520ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes/*  90 */     0,     0, -ESC_k,       0,      0, ESC_n,      0, -ESC_p,
353f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*  98 */     0, ESC_r,      0,     '}',      0,     0,      0,      0,
354f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*  A0 */     0,   '~', -ESC_s, ESC_tee,      0,-ESC_v, -ESC_w,      0,
355f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*  A8 */     0,-ESC_z,      0,       0,      0,   '[',      0,      0,
356f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*  B0 */     0,     0,      0,       0,      0,     0,      0,      0,
357f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*  B8 */     0,     0,      0,       0,      0,   ']',    '=',    '-',
358f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*  C0 */   '{',-ESC_A, -ESC_B,  -ESC_C, -ESC_D,-ESC_E,      0, -ESC_G,
359f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*  C8 */-ESC_H,     0,      0,       0,      0,     0,      0,      0,
360f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*  D0 */   '}',     0, -ESC_K,       0,      0,-ESC_N,      0, -ESC_P,
361f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*  D8 */-ESC_Q,-ESC_R,      0,       0,      0,     0,      0,      0,
362f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*  E0 */  '\\',     0, -ESC_S,       0,      0,-ESC_V, -ESC_W, -ESC_X,
363f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*  E8 */     0,-ESC_Z,      0,       0,      0,     0,      0,      0,
364f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*  F0 */     0,     0,      0,       0,      0,     0,      0,      0,
36553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/*  F8 */     0,     0
366f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich};
3670ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes
3680ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes/* We also need a table of characters that may follow \c in an EBCDIC
3690ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughesenvironment for characters 0-31. */
3700ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes
3710ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughesstatic unsigned char ebcdic_escape_c[] = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
3720ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes
37353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif   /* EBCDIC */
374f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
375f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
376f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Table of special "verbs" like (*PRUNE). This is a short table, so it is
377f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichsearched linearly. Put all the names into a single string, in order to reduce
378f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichthe number of relocations when a shared library is dynamically linked. The
379f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstring is built from string macros so that it works in UTF-8 mode on EBCDIC
380f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichplatforms. */
381f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
382f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichtypedef struct verbitem {
383f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  int   len;                 /* Length of verb name */
384f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  int   op;                  /* Op when no arg, or -1 if arg mandatory */
385f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  int   op_arg;              /* Op when arg present, or -1 if not allowed */
386f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich} verbitem;
387f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
388f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic const char verbnames[] =
389f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  "\0"                       /* Empty name is a shorthand for MARK */
390f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  STRING_MARK0
391f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  STRING_ACCEPT0
392f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  STRING_COMMIT0
393f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  STRING_F0
394f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  STRING_FAIL0
395f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  STRING_PRUNE0
396f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  STRING_SKIP0
397f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  STRING_THEN;
398f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
399f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic const verbitem verbs[] = {
400f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  { 0, -1,        OP_MARK },
401f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  { 4, -1,        OP_MARK },
402f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  { 6, OP_ACCEPT, -1 },
403f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  { 6, OP_COMMIT, -1 },
404f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  { 1, OP_FAIL,   -1 },
405f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  { 4, OP_FAIL,   -1 },
406f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  { 5, OP_PRUNE,  OP_PRUNE_ARG },
407f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  { 4, OP_SKIP,   OP_SKIP_ARG  },
408f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  { 4, OP_THEN,   OP_THEN_ARG  }
409f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich};
410f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
411f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic const int verbcount = sizeof(verbs)/sizeof(verbitem);
412f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
413f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
414f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Substitutes for [[:<:]] and [[:>:]], which mean start and end of word in
415f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichanother regex library. */
416f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
41753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR sub_start_of_word[] = {
418f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_BACKSLASH, CHAR_b, CHAR_LEFT_PARENTHESIS, CHAR_QUESTION_MARK,
419f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_EQUALS_SIGN, CHAR_BACKSLASH, CHAR_w, CHAR_RIGHT_PARENTHESIS, '\0' };
420f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
42153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR sub_end_of_word[] = {
422f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_BACKSLASH, CHAR_b, CHAR_LEFT_PARENTHESIS, CHAR_QUESTION_MARK,
423f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_LESS_THAN_SIGN, CHAR_EQUALS_SIGN, CHAR_BACKSLASH, CHAR_w,
424f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_RIGHT_PARENTHESIS, '\0' };
425f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
426f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
427f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Tables of names of POSIX character classes and their lengths. The names are
428f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichnow all in a single string, to reduce the number of relocations when a shared
429f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichlibrary is dynamically loaded. The list of lengths is terminated by a zero
430f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichlength entry. The first three must be alpha, lower, upper, as this is assumed
431f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichfor handling case independence. The indices for graph, print, and punct are
432f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichneeded, so identify them. */
433f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
434f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic const char posix_names[] =
435f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0
436f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0
437f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  STRING_graph0 STRING_print0 STRING_punct0 STRING_space0
438f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  STRING_word0  STRING_xdigit;
439f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
44053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const uint8_t posix_name_lengths[] = {
441f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
442f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
443f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#define PC_GRAPH  8
444f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#define PC_PRINT  9
445f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#define PC_PUNCT 10
446f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
447f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
448f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Table of class bit maps for each POSIX class. Each class is formed from a
449f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbase map, with an optional addition or removal of another map. Then, for some
450f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichclasses, there is some additional tweaking: for [:blank:] the vertical space
451f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichcharacters are removed, and for [:alpha:] and [:alnum:] the underscore
452f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichcharacter is removed. The triples in the table consist of the base map offset,
453f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichsecond map offset or -1 if no second map, and a non-negative value for map
454f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichaddition or a negative value for map subtraction (if there are two maps). The
455f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichabsolute value of the third field has these meanings: 0 => no tweaking, 1 =>
456f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichremove vertical space characters, 2 => remove underscore. */
457f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
458f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic const int posix_class_maps[] = {
459f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  cbit_word,  cbit_digit, -2,             /* alpha */
460f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  cbit_lower, -1,          0,             /* lower */
461f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  cbit_upper, -1,          0,             /* upper */
462f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  cbit_word,  -1,          2,             /* alnum - word without underscore */
463f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  cbit_print, cbit_cntrl,  0,             /* ascii */
464f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  cbit_space, -1,          1,             /* blank - a GNU extension */
465f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  cbit_cntrl, -1,          0,             /* cntrl */
466f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  cbit_digit, -1,          0,             /* digit */
467f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  cbit_graph, -1,          0,             /* graph */
468f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  cbit_print, -1,          0,             /* print */
469f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  cbit_punct, -1,          0,             /* punct */
470f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  cbit_space, -1,          0,             /* space */
471f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  cbit_word,  -1,          0,             /* word - a Perl extension */
472f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  cbit_xdigit,-1,          0              /* xdigit */
473f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich};
474f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
47553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Table of substitutes for \d etc when PCRE2_UCP is set. They are replaced by
476f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichUnicode property escapes. */
477f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
47853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
47953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_PNd[]  = {
480f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
481f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
48253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_pNd[]  = {
483f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
484f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
48553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_PXsp[] = {
486f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
487f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' };
48853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_pXsp[] = {
489f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
490f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' };
49153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_PXwd[] = {
492f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
493f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
49453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_pXwd[] = {
495f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
496f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' };
497f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
49853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic PCRE2_SPTR substitutes[] = {
499f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_PNd,           /* \D */
500f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_pNd,           /* \d */
501f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_PXsp,          /* \S */   /* Xsp is Perl space, but from 8.34, Perl */
502f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_pXsp,          /* \s */   /* space and POSIX space are the same. */
503f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_PXwd,          /* \W */
504f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_pXwd           /* \w */
505f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich};
506f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
507f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* The POSIX class substitutes must be in the order of the POSIX class names,
508f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichdefined above, and there are both positive and negative cases. NULL means no
509f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichgeneral substitute of a Unicode property escape (\p or \P). However, for some
510f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichPOSIX classes (e.g. graph, print, punct) a special property code is compiled
511f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichdirectly. */
512f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
51353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_pCc[] =  {
51453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
51553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  CHAR_C, CHAR_c, CHAR_RIGHT_CURLY_BRACKET, '\0' };
51653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_pL[] =   {
517f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
518f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
51953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_pLl[] =  {
520f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
521f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' };
52253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_pLu[] =  {
523f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
524f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' };
52553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_pXan[] = {
526f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
527f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' };
52853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_h[] =    {
529f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_BACKSLASH, CHAR_h, '\0' };
53053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_pXps[] = {
531f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
532f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' };
53353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_PCc[] =  {
53453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
53553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  CHAR_C, CHAR_c, CHAR_RIGHT_CURLY_BRACKET, '\0' };
53653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_PL[] =   {
537f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
538f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
53953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_PLl[] =  {
540f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
541f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' };
54253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_PLu[] =  {
543f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
544f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' };
54553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_PXan[] = {
546f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
547f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' };
54853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_H[] =    {
549f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_BACKSLASH, CHAR_H, '\0' };
55053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR string_PXps[] = {
551f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET,
552f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' };
553f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
55453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic PCRE2_SPTR posix_substitutes[] = {
555f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_pL,            /* alpha */
556f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_pLl,           /* lower */
557f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_pLu,           /* upper */
558f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_pXan,          /* alnum */
559f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  NULL,                 /* ascii */
560f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_h,             /* blank */
56153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  string_pCc,           /* cntrl */
562f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_pNd,           /* digit */
563f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  NULL,                 /* graph */
564f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  NULL,                 /* print */
565f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  NULL,                 /* punct */
566f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_pXps,          /* space */   /* Xps is POSIX space, but from 8.34 */
567f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_pXwd,          /* word  */   /* Perl and POSIX space are the same */
568f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  NULL,                 /* xdigit */
569f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  /* Negated cases */
570f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_PL,            /* ^alpha */
571f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_PLl,           /* ^lower */
572f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_PLu,           /* ^upper */
573f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_PXan,          /* ^alnum */
574f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  NULL,                 /* ^ascii */
575f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_H,             /* ^blank */
57653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  string_PCc,           /* ^cntrl */
577f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_PNd,           /* ^digit */
578f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  NULL,                 /* ^graph */
579f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  NULL,                 /* ^print */
580f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  NULL,                 /* ^punct */
581f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_PXps,          /* ^space */  /* Xps is POSIX space, but from 8.34 */
582f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  string_PXwd,          /* ^word */   /* Perl and POSIX space are the same */
583f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  NULL                  /* ^xdigit */
584f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich};
58553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(PCRE2_UCHAR *))
58653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif  /* SUPPORT_UNICODE */
58753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
58853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Masks for checking option settings. */
58953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
59053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define PUBLIC_COMPILE_OPTIONS \
59153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
59253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis   PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
59353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis   PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \
59453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis   PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
59553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis   PCRE2_NEVER_UCP|PCRE2_NEVER_UTF|PCRE2_NO_AUTO_CAPTURE| \
59653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis   PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
59753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis   PCRE2_NO_UTF_CHECK|PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
59853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis   PCRE2_UTF)
59953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
60053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Compile time error code numbers. They are given names so that they can more
60153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiseasily be tracked. When a new number is added, the tables called eint1 and
60253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiseint2 in pcre2posix.c may need to be updated, and a new error text must be
60353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisadded to compile_error_texts in pcre2_error.c. */
60453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
60553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisenum { ERR0 = COMPILE_ERROR_BASE,
60653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,  ERR10,
60753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
60853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, ERR30,
60953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40,
61053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
61153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
61253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
61353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
61453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88 };
61553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
61653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Error codes that correspond to negative error codes returned by
61753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfind_fixedlength(). */
61853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
61953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic int fixed_length_errors[] =
620f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
62153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ERR0,    /* Not an error */
62253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ERR0,    /* Not an error; -1 is used for "process later" */
62353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ERR25,   /* Lookbehind is not fixed length */
62453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ERR36,   /* \C in lookbehind is not allowed */
62553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ERR87,   /* Lookbehind is too long */
62653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ERR86,   /* Pattern too complicated */
62753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ERR70    /* Internal error: unknown opcode encountered */
62853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  };
62953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
63053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This is a table of start-of-pattern options such as (*UTF) and settings such
63153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisas (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
63253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscompatibility, (*UTFn) is supported in the relevant libraries, but (*UTF) is
63353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisgeneric and always supported. */
63453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
63553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisenum { PSO_OPT,     /* Value is an option bit */
63653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       PSO_FLG,     /* Value is a flag bit */
63753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       PSO_NL,      /* Value is a newline type */
63853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       PSO_BSR,     /* Value is a \R type */
63953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       PSO_LIMM,    /* Read integer value for match limit */
64053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       PSO_LIMR };  /* Read integer value for recursion limit */
64153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
64253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskistypedef struct pso {
64353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  const uint8_t *name;
64453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uint16_t length;
64553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uint16_t type;
64653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uint32_t value;
64753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis} pso;
64853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
64953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* NB: STRING_UTFn_RIGHTPAR contains the length as well */
65053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
65153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic pso pso_list[] = {
65253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  { (uint8_t *)STRING_UTFn_RIGHTPAR,                  PSO_OPT, PCRE2_UTF },
65353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  { (uint8_t *)STRING_UTF_RIGHTPAR,                4, PSO_OPT, PCRE2_UTF },
65453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  { (uint8_t *)STRING_UCP_RIGHTPAR,                4, PSO_OPT, PCRE2_UCP },
65553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  { (uint8_t *)STRING_NOTEMPTY_RIGHTPAR,           9, PSO_FLG, PCRE2_NOTEMPTY_SET },
65653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  { (uint8_t *)STRING_NOTEMPTY_ATSTART_RIGHTPAR,  17, PSO_FLG, PCRE2_NE_ATST_SET },
65753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  { (uint8_t *)STRING_NO_AUTO_POSSESS_RIGHTPAR,   16, PSO_OPT, PCRE2_NO_AUTO_POSSESS },
65853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  { (uint8_t *)STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR, 18, PSO_OPT, PCRE2_NO_DOTSTAR_ANCHOR },
65953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  { (uint8_t *)STRING_NO_JIT_RIGHTPAR,             7, PSO_FLG, PCRE2_NOJIT },
66053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  { (uint8_t *)STRING_NO_START_OPT_RIGHTPAR,      13, PSO_OPT, PCRE2_NO_START_OPTIMIZE },
66153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  { (uint8_t *)STRING_LIMIT_MATCH_EQ,             12, PSO_LIMM, 0 },
66253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  { (uint8_t *)STRING_LIMIT_RECURSION_EQ,         16, PSO_LIMR, 0 },
66353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  { (uint8_t *)STRING_CR_RIGHTPAR,                 3, PSO_NL,  PCRE2_NEWLINE_CR },
66453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  { (uint8_t *)STRING_LF_RIGHTPAR,                 3, PSO_NL,  PCRE2_NEWLINE_LF },
66553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  { (uint8_t *)STRING_CRLF_RIGHTPAR,               5, PSO_NL,  PCRE2_NEWLINE_CRLF },
66653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  { (uint8_t *)STRING_ANY_RIGHTPAR,                4, PSO_NL,  PCRE2_NEWLINE_ANY },
66753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  { (uint8_t *)STRING_ANYCRLF_RIGHTPAR,            8, PSO_NL,  PCRE2_NEWLINE_ANYCRLF },
66853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  { (uint8_t *)STRING_BSR_ANYCRLF_RIGHTPAR,       12, PSO_BSR, PCRE2_BSR_ANYCRLF },
66953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  { (uint8_t *)STRING_BSR_UNICODE_RIGHTPAR,       12, PSO_BSR, PCRE2_BSR_UNICODE }
670f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich};
671f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
672f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* This table is used when converting repeating opcodes into possessified
673f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichversions as a result of an explicit possessive quantifier such as ++. A zero
674f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichvalue means there is no possessified version - in those cases the item in
675f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichquestion must be wrapped in ONCE brackets. The table is truncated at OP_CALLOUT
676f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbecause all relevant opcodes are less than that. */
677f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
67853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const uint8_t opcode_possessify[] = {
679f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   /* 0 - 15  */
680f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,   /* 16 - 31 */
681f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
682f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  0,                       /* NOTI */
683f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_POSSTAR, 0,           /* STAR, MINSTAR */
684f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_POSPLUS, 0,           /* PLUS, MINPLUS */
685f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_POSQUERY, 0,          /* QUERY, MINQUERY */
686f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_POSUPTO, 0,           /* UPTO, MINUPTO */
687f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  0,                       /* EXACT */
688f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  0, 0, 0, 0,              /* POS{STAR,PLUS,QUERY,UPTO} */
689f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
690f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_POSSTARI, 0,          /* STARI, MINSTARI */
691f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_POSPLUSI, 0,          /* PLUSI, MINPLUSI */
692f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_POSQUERYI, 0,         /* QUERYI, MINQUERYI */
693f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_POSUPTOI, 0,          /* UPTOI, MINUPTOI */
694f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  0,                       /* EXACTI */
695f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  0, 0, 0, 0,              /* POS{STARI,PLUSI,QUERYI,UPTOI} */
696f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
697f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_NOTPOSSTAR, 0,        /* NOTSTAR, NOTMINSTAR */
698f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_NOTPOSPLUS, 0,        /* NOTPLUS, NOTMINPLUS */
699f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_NOTPOSQUERY, 0,       /* NOTQUERY, NOTMINQUERY */
700f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_NOTPOSUPTO, 0,        /* NOTUPTO, NOTMINUPTO */
701f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  0,                       /* NOTEXACT */
702f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  0, 0, 0, 0,              /* NOTPOS{STAR,PLUS,QUERY,UPTO} */
703f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
704f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_NOTPOSSTARI, 0,       /* NOTSTARI, NOTMINSTARI */
705f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_NOTPOSPLUSI, 0,       /* NOTPLUSI, NOTMINPLUSI */
706f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_NOTPOSQUERYI, 0,      /* NOTQUERYI, NOTMINQUERYI */
707f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_NOTPOSUPTOI, 0,       /* NOTUPTOI, NOTMINUPTOI */
708f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  0,                       /* NOTEXACTI */
709f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  0, 0, 0, 0,              /* NOTPOS{STARI,PLUSI,QUERYI,UPTOI} */
710f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
711f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_TYPEPOSSTAR, 0,       /* TYPESTAR, TYPEMINSTAR */
712f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_TYPEPOSPLUS, 0,       /* TYPEPLUS, TYPEMINPLUS */
713f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_TYPEPOSQUERY, 0,      /* TYPEQUERY, TYPEMINQUERY */
714f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_TYPEPOSUPTO, 0,       /* TYPEUPTO, TYPEMINUPTO */
715f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  0,                       /* TYPEEXACT */
716f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  0, 0, 0, 0,              /* TYPEPOS{STAR,PLUS,QUERY,UPTO} */
717f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
718f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_CRPOSSTAR, 0,         /* CRSTAR, CRMINSTAR */
719f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_CRPOSPLUS, 0,         /* CRPLUS, CRMINPLUS */
720f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_CRPOSQUERY, 0,        /* CRQUERY, CRMINQUERY */
721f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  OP_CRPOSRANGE, 0,        /* CRRANGE, CRMINRANGE */
722f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  0, 0, 0, 0,              /* CRPOS{STAR,PLUS,QUERY,RANGE} */
723f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
724f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  0, 0, 0,                 /* CLASS, NCLASS, XCLASS */
725f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  0, 0,                    /* REF, REFI */
726f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  0, 0,                    /* DNREF, DNREFI */
727f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  0, 0                     /* RECURSE, CALLOUT */
728f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich};
729f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
730f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
731f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
732f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*************************************************
7338b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis*               Copy compiled code               *
7348b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis*************************************************/
7358b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis
7368b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis/* Compiled JIT code cannot be copied, so the new compiled block has no
7378b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisassociated JIT data. */
7388b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis
7398b979b2abae173bb836d8e85a842cfd00447d4beJanis DanisevskisPCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION
7408b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskispcre2_code_copy(const pcre2_code *code)
7418b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis{
7428b979b2abae173bb836d8e85a842cfd00447d4beJanis DanisevskisPCRE2_SIZE* ref_count;
7438b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskispcre2_code *newcode;
7448b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis
7458b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisif (code == NULL) return NULL;
7468b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisnewcode = code->memctl.malloc(code->blocksize, code->memctl.memory_data);
7478b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisif (newcode == NULL) return NULL;
7488b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskismemcpy(newcode, code, code->blocksize);
7498b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisnewcode->executable_jit = NULL;
7508b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis
7518b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis/* If the code is one that has been deserialized, increment the reference count
7528b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisin the decoded tables. */
7538b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis
7548b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisif ((code->flags & PCRE2_DEREF_TABLES) != 0)
7558b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis  {
7568b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis  ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
7578b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis  (*ref_count)++;
7588b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis  }
7598b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis
7608b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisreturn newcode;
7618b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis}
7628b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis
7638b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis
7648b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis
7658b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis/*************************************************
76653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*               Free compiled code               *
767f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/
768f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
76953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
77053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispcre2_code_free(pcre2_code *code)
771f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{
77253e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SIZE* ref_count;
77353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
77453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (code != NULL)
775f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
77653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (code->executable_jit != NULL)
77753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    PRIV(jit_free)(code->executable_jit, &code->memctl);
77853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
77953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if ((code->flags & PCRE2_DEREF_TABLES) != 0)
78053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
78153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Decoded tables belong to the codes after deserialization, and they must
78253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    be freed when there are no more reference to them. The *ref_count should
78353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    always be > 0. */
78453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
78553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
78653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (*ref_count > 0)
78753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
78853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      (*ref_count)--;
78953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (*ref_count == 0)
79053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        code->memctl.free((void *)code->tables, code->memctl.memory_data);
79153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
79253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
79353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
79453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  code->memctl.free(code, code->memctl.memory_data);
795f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
796f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}
797f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
798f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
799f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
800f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*************************************************
80153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*        Insert an automatic callout point       *
802f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/
803f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
80453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is called when the PCRE2_AUTO_CALLOUT option is set, to insert
80553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscallout points before each pattern item.
80653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
80753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments:
80853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  code           current code pointer
80953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ptr            current pattern pointer
81053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb             general compile-time data
811f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
81253e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns:         new code pointer
813f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/
814f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
81553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic PCRE2_UCHAR *
81653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisauto_callout(PCRE2_UCHAR *code, PCRE2_SPTR ptr, compile_block *cb)
817f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{
81853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscode[0] = OP_CALLOUT;
81953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPUT(code, 1, ptr - cb->start_pattern);  /* Pattern offset */
82053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPUT(code, 1 + LINK_SIZE, 0);            /* Default length */
82153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscode[1 + 2*LINK_SIZE] = 255;
82253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn code + PRIV(OP_lengths)[OP_CALLOUT];
823f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}
824f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
825f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
826f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
827f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*************************************************
82853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*         Complete a callout item                *
829f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/
830f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
83153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* A callout item contains the length of the next item in the pattern, which
83253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswe can't fill in till after we have reached the relevant point. This is used
83353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor both automatic and manual callouts.
834f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
835f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments:
83653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  previous_callout   points to previous callout item
83753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ptr                current pattern pointer
83853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb                 general compile-time data
839f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
84053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns:             nothing
841f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/
842f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
84353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic void
84453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscomplete_callout(PCRE2_UCHAR *previous_callout, PCRE2_SPTR ptr,
84553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  compile_block *cb)
846f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{
8478b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskissize_t length = (size_t)(ptr - cb->start_pattern - GET(previous_callout, 1));
84853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPUT(previous_callout, 1 + LINK_SIZE, length);
849f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}
850f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
851f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
852f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
853f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*************************************************
85453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*        Find the fixed length of a branch       *
855f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/
856f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
85753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Scan a branch and compute the fixed length of subject that will match it, if
85853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe length is fixed. This is needed for dealing with lookbehind assertions. In
85953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisUTF mode, the result is in code units rather than bytes. The branch is
86053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskistemporarily terminated with OP_END when this function is called.
861f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
86253e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisThis function is called when a lookbehind assertion is encountered, so that if
86353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisit fails, the error message can point to the correct place in the pattern.
86453e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisHowever, we cannot do this when the assertion contains subroutine calls,
86553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisbecause they can be forward references. We solve this by remembering this case
86653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisand doing the check at the end; a flag specifies which mode we are running in.
867f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
86853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisLookbehind lengths are held in 16-bit fields and the maximum value is defined
86953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisas LOOKBEHIND_MAX.
87053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
87153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments:
87253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  code        points to the start of the pattern (the bracket)
87353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  utf         TRUE in UTF mode
87453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  atend       TRUE if called when the pattern is complete
87553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb          the "compile data" structure
87653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  recurses    chain of recurse_check to catch mutual recursion
87753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  countptr    pointer to counter, to catch over-complexity
87853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
87953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns:   if non-negative, the fixed length,
88053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis             or -1 if an OP_RECURSE item was encountered and atend is FALSE
88153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis             or -2 if there is no fixed length,
8828b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis             or -3 if \C was encountered (in UTF mode only)
8838b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis             or -4 if length is too long
8848b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis             or -5 if regex is too complicated
8858b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis             or -6 if an unknown opcode was encountered (internal error)
886f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/
887f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
88853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define FFL_LATER           (-1)
88953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define FFL_NOTFIXED        (-2)
89053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define FFL_BACKSLASHC      (-3)
89153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define FFL_TOOLONG         (-4)
89253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define FFL_TOOCOMPLICATED  (-5)
89353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define FFL_UNKNOWNOP       (-6)
89453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
895f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic int
89653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfind_fixedlength(PCRE2_UCHAR *code, BOOL utf, BOOL atend, compile_block *cb,
89753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  recurse_check *recurses, int *countptr)
898f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{
8998b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisuint32_t length = 0xffffffffu;   /* Unset */
90053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t group = 0;
90153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t groupinfo = 0;
90253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisrecurse_check this_recurse;
9038b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisregister uint32_t branchlength = 0;
90453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisregister PCRE2_UCHAR *cc = code + 1 + LINK_SIZE;
905f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
90653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* If this is a capturing group, we may have the answer cached, but we can only
90753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuse this information if there are no (?| groups in the pattern, because
90853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisotherwise group numbers are not unique. */
909f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
91053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (*code == OP_CBRA || *code == OP_CBRAPOS || *code == OP_SCBRA ||
91153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *code == OP_SCBRAPOS)
91253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
91353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  group = GET2(cc, 0);
91453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cc += IMM2_SIZE;
91553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  groupinfo = cb->groupinfo[group];
91653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if ((cb->external_flags & PCRE2_DUPCAPUSED) == 0)
91753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
91853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if ((groupinfo & GI_NOT_FIXED_LENGTH) != 0) return FFL_NOTFIXED;
91953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if ((groupinfo & GI_SET_FIXED_LENGTH) != 0)
92053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      return groupinfo & GI_FIXED_LENGTH_MASK;
92153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
92253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
923f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
92453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* A large and/or complex regex can take too long to process. This can happen
92553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismore often when (?| groups are present in the pattern. */
926f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
92753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((*countptr)++ > 2000) return FFL_TOOCOMPLICATED;
928f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
92953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Scan along the opcodes for this branch. If we get to the end of the
93053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisbranch, check the length against that of the other branches. */
931f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
93253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (;;)
933f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
93453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  int d;
93553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  PCRE2_UCHAR *ce, *cs;
93653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  register PCRE2_UCHAR op = *cc;
937f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
93853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (branchlength > LOOKBEHIND_MAX) return FFL_TOOLONG;
93953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
94053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  switch (op)
941f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
94253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* We only need to continue for OP_CBRA (normal capturing bracket) and
94353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    OP_BRA (normal non-capturing bracket) because the other variants of these
94453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    opcodes are all concerned with unlimited repeated groups, which of course
94553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    are not of fixed length. */
946f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
94753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_CBRA:
94853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_BRA:
94953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_ONCE:
95053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_ONCE_NC:
95153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_COND:
95253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    d = find_fixedlength(cc, utf, atend, cb, recurses, countptr);
95353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (d < 0) return d;
9548b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis    branchlength += (uint32_t)d;
95553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    do cc += GET(cc, 1); while (*cc == OP_ALT);
95653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    cc += 1 + LINK_SIZE;
957f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    break;
958f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
95953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Reached end of a branch; if it's a ket it is the end of a nested call.
96053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    If it's ALT it is an alternation in a nested call. An ACCEPT is effectively
96153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    an ALT. If it is END it's the end of the outer call. All can be handled by
96253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    the same code. Note that we must not include the OP_KETRxxx opcodes here,
96353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    because they all imply an unlimited repeat. */
96453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
96553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_ALT:
96653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_KET:
96753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_END:
96853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_ACCEPT:
96953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_ASSERT_ACCEPT:
9708b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis    if (length == 0xffffffffu) length = branchlength;
97153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      else if (length != branchlength) goto ISNOTFIXED;
97253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (*cc != OP_ALT)
973f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
97453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (group > 0)
975f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
9768b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis        groupinfo |= (uint32_t)(GI_SET_FIXED_LENGTH | length);
97753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        cb->groupinfo[group] = groupinfo;
978f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
9798b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      return (int)length;
980f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
98153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    cc += 1 + LINK_SIZE;
98253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    branchlength = 0;
983f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    break;
984f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
98553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* A true recursion implies not fixed length, but a subroutine call may
98653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    be OK. If the subroutine is a forward reference, we can't deal with
98753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    it until the end of the pattern, so return FFL_LATER. */
98853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
98953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_RECURSE:
99053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (!atend) return FFL_LATER;
99153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    cs = ce = (PCRE2_UCHAR *)cb->start_code + GET(cc, 1); /* Start subpattern */
99253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    do ce += GET(ce, 1); while (*ce == OP_ALT);           /* End subpattern */
99353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (cc > cs && cc < ce) goto ISNOTFIXED;          /* Recursion */
99453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    else   /* Check for mutual recursion */
99553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
99653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      recurse_check *r = recurses;
99753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
99853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (r != NULL) goto ISNOTFIXED;   /* Mutual recursion */
99953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
100053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    this_recurse.prev = recurses;
100153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    this_recurse.group = cs;
100253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    d = find_fixedlength(cs, utf, atend, cb, &this_recurse, countptr);
100353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (d < 0) return d;
10048b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis    branchlength += (uint32_t)d;
100553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    cc += 1 + LINK_SIZE;
1006f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    break;
1007f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
100853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Skip over assertive subpatterns. Note that we must increment cc by
100953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    1 + LINK_SIZE at the end, not by OP_length[*cc] because in a recursive
101053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    situation this assertion may be the one that is ultimately being checked
101153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    for having a fixed length, in which case its terminating OP_KET will have
101253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    been temporarily replaced by OP_END. */
1013f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
101453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_ASSERT:
101553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_ASSERT_NOT:
101653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_ASSERTBACK:
101753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_ASSERTBACK_NOT:
101853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    do cc += GET(cc, 1); while (*cc == OP_ALT);
101953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    cc += 1 + LINK_SIZE;
102053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
1021f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
102253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Skip over things that don't match chars */
1023f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
102453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_MARK:
102553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_PRUNE_ARG:
102653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_SKIP_ARG:
102753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_THEN_ARG:
102853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    cc += cc[1] + PRIV(OP_lengths)[*cc];
102953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
1030f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1031f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_CALLOUT:
1032f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_CIRC:
1033f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_CIRCM:
1034f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_CLOSE:
1035f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_COMMIT:
1036f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_CREF:
103753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_FALSE:
103853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_TRUE:
1039f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_DNCREF:
1040f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_DNRREF:
1041f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_DOLL:
1042f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_DOLLM:
1043f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_EOD:
1044f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_EODN:
1045f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_FAIL:
1046f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOT_WORD_BOUNDARY:
1047f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_PRUNE:
1048f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_REVERSE:
1049f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_RREF:
1050f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_SET_SOM:
1051f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_SKIP:
1052f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_SOD:
1053f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_SOM:
1054f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_THEN:
1055f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_WORD_BOUNDARY:
1056f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    cc += PRIV(OP_lengths)[*cc];
1057f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    break;
1058f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
105953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_CALLOUT_STR:
106053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    cc += GET(cc, 1 + 2*LINK_SIZE);
106153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
106253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
1063f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Handle literal characters */
1064f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1065f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_CHAR:
1066f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_CHARI:
1067f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOT:
1068f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTI:
1069f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    branchlength++;
1070f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    cc += 2;
107153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
1072f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1073f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
1074f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    break;
1075f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1076f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Handle exact repetitions. The count is already in characters, but we
1077f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    need to skip over a multibyte character in UTF8 mode.  */
1078f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1079f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_EXACT:
1080f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_EXACTI:
1081f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTEXACT:
1082f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTEXACTI:
10838b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis    branchlength += GET2(cc,1);
1084f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    cc += 2 + IMM2_SIZE;
108553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
1086f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1087f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
1088f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    break;
1089f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1090f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_TYPEEXACT:
1091f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    branchlength += GET2(cc,1);
1092f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (cc[1 + IMM2_SIZE] == OP_PROP || cc[1 + IMM2_SIZE] == OP_NOTPROP)
1093f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      cc += 2;
1094f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    cc += 1 + IMM2_SIZE + 1;
1095f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    break;
1096f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1097f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Handle single-char matchers */
1098f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1099f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_PROP:
1100f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTPROP:
1101f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    cc += 2;
1102f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Fall through */
1103f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1104f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_HSPACE:
1105f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_VSPACE:
1106f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOT_HSPACE:
1107f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOT_VSPACE:
1108f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOT_DIGIT:
1109f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_DIGIT:
1110f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOT_WHITESPACE:
1111f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_WHITESPACE:
1112f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOT_WORDCHAR:
1113f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_WORDCHAR:
1114f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_ANY:
1115f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_ALLANY:
1116f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    branchlength++;
1117f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    cc++;
1118f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    break;
1119f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
11208b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis    /* The single-byte matcher isn't allowed. This only happens in UTF-8 or
11218b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis    UTF-16 mode; otherwise \C is coded as OP_ALLANY. */
1122f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1123f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_ANYBYTE:
112453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    return FFL_BACKSLASHC;
1125f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1126f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Check a class for variable quantification */
1127f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1128f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_CLASS:
1129f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NCLASS:
113053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_WIDE_CHARS
1131f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_XCLASS:
1132f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* The original code caused an unsigned overflow in 64 bit systems,
1133f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    so now we use a conditional statement. */
1134f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (op == OP_XCLASS)
1135f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      cc += GET(cc, 1);
1136f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    else
1137f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      cc += PRIV(OP_lengths)[OP_CLASS];
1138f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#else
1139f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    cc += PRIV(OP_lengths)[OP_CLASS];
1140f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
1141f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1142f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    switch (*cc)
1143f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
1144f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      case OP_CRSTAR:
1145f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      case OP_CRMINSTAR:
1146f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      case OP_CRPLUS:
1147f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      case OP_CRMINPLUS:
1148f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      case OP_CRQUERY:
1149f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      case OP_CRMINQUERY:
1150f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      case OP_CRPOSSTAR:
1151f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      case OP_CRPOSPLUS:
1152f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      case OP_CRPOSQUERY:
115353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      goto ISNOTFIXED;
1154f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1155f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      case OP_CRRANGE:
1156f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      case OP_CRMINRANGE:
1157f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      case OP_CRPOSRANGE:
115853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) goto ISNOTFIXED;
11598b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      branchlength += GET2(cc,1);
1160f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      cc += 1 + 2 * IMM2_SIZE;
1161f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      break;
1162f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1163f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      default:
1164f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      branchlength++;
1165f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
1166f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    break;
1167f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1168f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Anything else is variable length */
1169f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1170f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_ANYNL:
1171f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_BRAMINZERO:
1172f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_BRAPOS:
1173f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_BRAPOSZERO:
1174f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_BRAZERO:
1175f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_CBRAPOS:
1176f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_EXTUNI:
1177f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_KETRMAX:
1178f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_KETRMIN:
1179f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_KETRPOS:
1180f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_MINPLUS:
1181f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_MINPLUSI:
1182f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_MINQUERY:
1183f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_MINQUERYI:
1184f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_MINSTAR:
1185f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_MINSTARI:
1186f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_MINUPTO:
1187f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_MINUPTOI:
1188f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTMINPLUS:
1189f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTMINPLUSI:
1190f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTMINQUERY:
1191f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTMINQUERYI:
1192f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTMINSTAR:
1193f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTMINSTARI:
1194f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTMINUPTO:
1195f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTMINUPTOI:
1196f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTPLUS:
1197f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTPLUSI:
1198f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTPOSPLUS:
1199f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTPOSPLUSI:
1200f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTPOSQUERY:
1201f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTPOSQUERYI:
1202f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTPOSSTAR:
1203f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTPOSSTARI:
1204f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTPOSUPTO:
1205f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTPOSUPTOI:
1206f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTQUERY:
1207f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTQUERYI:
1208f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTSTAR:
1209f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTSTARI:
1210f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTUPTO:
1211f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_NOTUPTOI:
1212f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_PLUS:
1213f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_PLUSI:
1214f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_POSPLUS:
1215f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_POSPLUSI:
1216f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_POSQUERY:
1217f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_POSQUERYI:
1218f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_POSSTAR:
1219f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_POSSTARI:
1220f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_POSUPTO:
1221f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_POSUPTOI:
1222f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_QUERY:
1223f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_QUERYI:
1224f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_REF:
1225f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_REFI:
1226f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_DNREF:
1227f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_DNREFI:
1228f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_SBRA:
1229f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_SBRAPOS:
1230f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_SCBRA:
1231f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_SCBRAPOS:
1232f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_SCOND:
1233f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_SKIPZERO:
1234f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_STAR:
1235f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_STARI:
1236f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_TYPEMINPLUS:
1237f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_TYPEMINQUERY:
1238f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_TYPEMINSTAR:
1239f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_TYPEMINUPTO:
1240f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_TYPEPLUS:
1241f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_TYPEPOSPLUS:
1242f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_TYPEPOSQUERY:
1243f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_TYPEPOSSTAR:
1244f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_TYPEPOSUPTO:
1245f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_TYPEQUERY:
1246f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_TYPESTAR:
1247f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_TYPEUPTO:
1248f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_UPTO:
1249f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case OP_UPTOI:
125053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    goto ISNOTFIXED;
1251f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1252f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Catch unrecognized opcodes so that when new ones are added they
1253f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    are not forgotten, as has happened in the past. */
1254f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1255f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    default:
125653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    return FFL_UNKNOWNOP;
125753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
125853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
125953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Control never gets here except by goto. */
126053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
126153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisISNOTFIXED:
126253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (group > 0)
126353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
126453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  groupinfo |= GI_NOT_FIXED_LENGTH;
126553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb->groupinfo[group] = groupinfo;
126653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
126753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn FFL_NOTFIXED;
126853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis}
126953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
127053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
127153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
127253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/*************************************************
127353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*      Find first significant op code            *
127453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*************************************************/
127553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
127653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This is called by several functions that scan a compiled expression looking
127753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor a fixed first character, or an anchoring op code etc. It skips over things
127853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthat do not influence this. For some calls, it makes sense to skip negative
127953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisforward and all backward assertions, and also the \b assertion; for others it
128053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisdoes not.
128153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
128253e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments:
128353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  code         pointer to the start of the group
128453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  skipassert   TRUE if certain assertions are to be skipped
128553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
128653e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns:       pointer to the first significant opcode
128753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/
128853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
128953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic const PCRE2_UCHAR*
129053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfirst_significant_code(PCRE2_SPTR code, BOOL skipassert)
129153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis{
129253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (;;)
129353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
129453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  switch ((int)*code)
129553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
129653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_ASSERT_NOT:
129753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_ASSERTBACK:
129853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_ASSERTBACK_NOT:
129953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (!skipassert) return code;
130053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    do code += GET(code, 1); while (*code == OP_ALT);
130153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    code += PRIV(OP_lengths)[*code];
130253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
130353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
130453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_WORD_BOUNDARY:
130553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOT_WORD_BOUNDARY:
130653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (!skipassert) return code;
130753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Fall through */
130853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
130953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_CALLOUT:
131053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_CREF:
131153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_DNCREF:
131253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_RREF:
131353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_DNRREF:
131453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_FALSE:
131553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_TRUE:
131653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    code += PRIV(OP_lengths)[*code];
131753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
131853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
131953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_CALLOUT_STR:
132053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    code += GET(code, 1 + 2*LINK_SIZE);
132153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
132253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
132353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    default:
132453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    return code;
1325f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
1326f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
132753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Control never reaches here */
1328f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}
1329f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1330f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1331f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
133253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/*************************************************
133353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*    Scan compiled branch for non-emptiness      *
133453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*************************************************/
133553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
133653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function scans through a branch of a compiled pattern to see whether it
133753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscan match the empty string. It is called at the end of compiling to check the
133853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisentire pattern, and from compile_branch() when checking for an unlimited repeat
133953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisof a group that can match nothing. In the latter case it is called only when
134053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisdoing the real compile, not during the pre-compile that measures the size of
134153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe compiled pattern.
134253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
134353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisNote that first_significant_code() skips over backward and negative forward
134453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisassertions when its final argument is TRUE. If we hit an unclosed bracket, we
134553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn "empty" - this means we've struck an inner bracket whose current branch
134653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswill already have been scanned.
134753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
134853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments:
134953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  code        points to start of search
135053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  endcode     points to where to stop
135153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  utf         TRUE if in UTF mode
135253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb          compile data
135353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  atend       TRUE if being called to check an entire pattern
135453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  recurses    chain of recurse_check to catch mutual recursion
135553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  countptr    pointer to count to catch over-complicated pattern
135653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
135753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns:      0 if what is matched cannot be empty
135853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              1 if what is matched could be empty
135953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis             -1 if the pattern is too complicated
136053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/
136153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
136253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define CBE_NOTEMPTY          0
136353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define CBE_EMPTY             1
136453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define CBE_TOOCOMPLICATED  (-1)
136553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
136653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
136753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic int
136853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscould_be_empty_branch(PCRE2_SPTR code, PCRE2_SPTR endcode, BOOL utf,
136953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  compile_block *cb, BOOL atend, recurse_check *recurses, int *countptr)
137053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis{
137153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t group = 0;
137253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t groupinfo = 0;
137353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisregister PCRE2_UCHAR c;
137453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisrecurse_check this_recurse;
137553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
137653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* If what we are checking has already been set as "could be empty", we know
137753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe answer. */
137853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
137953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (*code >= OP_SBRA && *code <= OP_SCOND) return CBE_EMPTY;
138053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
138153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* If this is a capturing group, we may have the answer cached, but we can only
138253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuse this information if there are no (?| groups in the pattern, because
138353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisotherwise group numbers are not unique. */
138453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
138553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((cb->external_flags & PCRE2_DUPCAPUSED) == 0 &&
138653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    (*code == OP_CBRA || *code == OP_CBRAPOS))
138753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
138853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  group = GET2(code, 1 + LINK_SIZE);
138953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  groupinfo = cb->groupinfo[group];
139053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if ((groupinfo & GI_SET_COULD_BE_EMPTY) != 0)
139153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    return ((groupinfo & GI_COULD_BE_EMPTY) != 0)? CBE_EMPTY : CBE_NOTEMPTY;
139253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
139353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
139453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* A large and/or complex regex can take too long to process. We have to assume
139553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisit can match an empty string. This can happen more often when (?| groups are
139653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispresent in the pattern and the caching is disabled. Setting the cap at 1100
139753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisallows the test for more than 1023 capturing patterns to work. */
139853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
139953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((*countptr)++ > 1100) return CBE_TOOCOMPLICATED;
140053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
140153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Scan the opcodes for this branch. */
140253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
140353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
140453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis     code < endcode;
140553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis     code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
140653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
140753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  PCRE2_SPTR ccode;
140853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
140953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  c = *code;
141053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
141153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* Skip over forward assertions; the other assertions are skipped by
141253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  first_significant_code() with a TRUE final argument. */
141353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
141453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (c == OP_ASSERT)
141553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
141653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    do code += GET(code, 1); while (*code == OP_ALT);
141753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    c = *code;
141853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    continue;
141953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
142053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
142153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* For a recursion/subroutine call we can scan the recursion when this
142253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  function is called at the end, to check a complete pattern. Before then,
142353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  recursions just have the group number as their argument and in any case may
142453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  be forward references. In that situation, we return CBE_EMPTY, just in case.
142553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  It means that unlimited repeats of groups that contain recursions are always
142653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  treated as "could be empty" - which just adds a bit more processing time
142753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  because of the runtime check. */
142853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
142953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (c == OP_RECURSE)
143053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
143153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    PCRE2_SPTR scode, endgroup;
143253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    BOOL empty_branch;
1433f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
143453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (!atend) goto ISTRUE;
143553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    scode = cb->start_code + GET(code, 1);
143653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    endgroup = scode;
1437f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
143853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* We need to detect whether this is a recursive call, as otherwise there
143953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    will be an infinite loop. If it is a recursion, just skip over it. Simple
144053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    recursions are easily detected. For mutual recursions we keep a chain on
144153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    the stack. */
1442f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
144353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);
144453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (code >= scode && code <= endgroup) continue;  /* Simple recursion */
144553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    else
144653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
144753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      recurse_check *r = recurses;
144853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      for (r = recurses; r != NULL; r = r->prev)
144953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (r->group == scode) break;
145053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (r != NULL) continue;   /* Mutual recursion */
145153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
1452f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
145353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Scan the referenced group, remembering it on the stack chain to detect
145453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    mutual recursions. */
1455f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
145653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    empty_branch = FALSE;
145753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    this_recurse.prev = recurses;
145853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    this_recurse.group = scode;
1459f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
146053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    do
146153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
146253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      int rc = could_be_empty_branch(scode, endcode, utf, cb, atend,
146353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        &this_recurse, countptr);
146453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (rc < 0) return rc;
146553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (rc > 0)
146653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
146753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        empty_branch = TRUE;
146853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        break;
146953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
147053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      scode += GET(scode, 1);
147153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
147253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    while (*scode == OP_ALT);
1473f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
147453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (!empty_branch) goto ISFALSE;  /* All branches are non-empty */
147553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    continue;
147653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
1477f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
147853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* Groups with zero repeats can of course be empty; skip them. */
1479f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
148053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO ||
148153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      c == OP_BRAPOSZERO)
1482f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
1483f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    code += PRIV(OP_lengths)[c];
148453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    do code += GET(code, 1); while (*code == OP_ALT);
148553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    c = *code;
148653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    continue;
1487f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
1488f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
148953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* A nested group that is already marked as "could be empty" can just be
149053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  skipped. */
1491f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
149253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (c == OP_SBRA  || c == OP_SBRAPOS ||
149353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      c == OP_SCBRA || c == OP_SCBRAPOS)
1494f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
149553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    do code += GET(code, 1); while (*code == OP_ALT);
149653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    c = *code;
149753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    continue;
1498f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
1499f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
150053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* For other groups, scan the branches. */
1501f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
150253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (c == OP_BRA  || c == OP_BRAPOS ||
150353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      c == OP_CBRA || c == OP_CBRAPOS ||
150453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      c == OP_ONCE || c == OP_ONCE_NC ||
150553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      c == OP_COND || c == OP_SCOND)
1506f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
150753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    BOOL empty_branch;
150853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (GET(code, 1) == 0) goto ISTRUE;    /* Hit unclosed bracket */
1509f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
151053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* If a conditional group has only one branch, there is a second, implied,
151153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    empty branch, so just skip over the conditional, because it could be empty.
151253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    Otherwise, scan the individual branches of the group. */
1513f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
151453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (c == OP_COND && code[GET(code, 1)] != OP_ALT)
151553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      code += GET(code, 1);
151653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    else
151753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
151853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      empty_branch = FALSE;
151953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      do
152053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
152153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (!empty_branch)
152253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
152353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          int rc = could_be_empty_branch(code, endcode, utf, cb, atend,
152453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            recurses, countptr);
152553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (rc < 0) return rc;
152653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (rc > 0) empty_branch = TRUE;
152753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
152853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        code += GET(code, 1);
152953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
153053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      while (*code == OP_ALT);
153153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (!empty_branch) goto ISFALSE;   /* All branches are non-empty */
1532f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
1533f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
153453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    c = *code;
153553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    continue;
153653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
1537f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
153853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* Handle the other opcodes */
153953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
154053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  switch (c)
154153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
154253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Check for quantifiers after a class. XCLASS is used for classes that
154353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    cannot be represented just by a bit map. This includes negated single
154453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    high-valued characters. The length in PRIV(OP_lengths)[] is zero; the
154553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    actual length is stored in the compiled code, so we must update "code"
154653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    here. */
1547f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
154853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
154953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_XCLASS:
155053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    ccode = code += GET(code, 1);
155153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    goto CHECK_CLASS_REPEAT;
155253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
1553f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
155453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_CLASS:
155553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NCLASS:
155653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    ccode = code + PRIV(OP_lengths)[OP_CLASS];
155753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
155853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
155953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    CHECK_CLASS_REPEAT:
156053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
156153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
156253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    switch (*ccode)
1563f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
156453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_CRSTAR:            /* These could be empty; continue */
156553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_CRMINSTAR:
156653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_CRQUERY:
156753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_CRMINQUERY:
156853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_CRPOSSTAR:
156953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_CRPOSQUERY:
157053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      break;
157153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
157253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      default:                   /* Non-repeat => class must match */
157353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_CRPLUS:            /* These repeats aren't empty */
157453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_CRMINPLUS:
157553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_CRPOSPLUS:
157653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      goto ISFALSE;
157753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
157853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_CRRANGE:
157953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_CRMINRANGE:
158053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_CRPOSRANGE:
158153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (GET2(ccode, 1) > 0) goto ISFALSE;  /* Minimum > 0 */
1582f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      break;
1583f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
158453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
158553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
158653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Opcodes that must match a character */
158753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
158853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_ANY:
158953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_ALLANY:
159053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_ANYBYTE:
159153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
159253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_PROP:
159353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTPROP:
159453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_ANYNL:
159553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
159653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOT_HSPACE:
159753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_HSPACE:
159853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOT_VSPACE:
159953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_VSPACE:
160053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_EXTUNI:
160153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
160253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOT_DIGIT:
160353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_DIGIT:
160453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOT_WHITESPACE:
160553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_WHITESPACE:
160653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOT_WORDCHAR:
160753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_WORDCHAR:
160853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
160953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_CHAR:
161053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_CHARI:
161153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOT:
161253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTI:
161353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
161453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_PLUS:
161553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_PLUSI:
161653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_MINPLUS:
161753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_MINPLUSI:
161853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
161953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTPLUS:
162053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTPLUSI:
162153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTMINPLUS:
162253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTMINPLUSI:
162353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
162453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_POSPLUS:
162553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_POSPLUSI:
162653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTPOSPLUS:
162753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTPOSPLUSI:
1628f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
162953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_EXACT:
163053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_EXACTI:
163153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTEXACT:
163253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTEXACTI:
1633f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
163453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_TYPEPLUS:
163553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_TYPEMINPLUS:
163653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_TYPEPOSPLUS:
163753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_TYPEEXACT:
163853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    goto ISFALSE;
1639f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
164053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* These are going to continue, as they may be empty, but we have to
164153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    fudge the length for the \p and \P cases. */
1642f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
164353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_TYPESTAR:
164453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_TYPEMINSTAR:
164553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_TYPEPOSSTAR:
164653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_TYPEQUERY:
164753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_TYPEMINQUERY:
164853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_TYPEPOSQUERY:
164953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
165053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
1651f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
165253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Same for these */
1653f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
165453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_TYPEUPTO:
165553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_TYPEMINUPTO:
165653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_TYPEPOSUPTO:
165753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
165853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      code += 2;
165953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
1660f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
166153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* End of branch */
1662f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
166353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_KET:
166453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_KETRMAX:
166553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_KETRMIN:
166653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_KETRPOS:
166753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_ALT:
166853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    goto ISTRUE;
1669f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
167053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* In UTF-8 or UTF-16 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY,
167153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    POSQUERY, UPTO, MINUPTO, and POSUPTO and their caseless and negative
167253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    versions may be followed by a multibyte character. */
1673f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
167453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef MAYBE_UTF_MULTI
167553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_STAR:
167653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_STARI:
167753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTSTAR:
167853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTSTARI:
1679f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
168053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_MINSTAR:
168153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_MINSTARI:
168253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTMINSTAR:
168353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTMINSTARI:
1684f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
168553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_POSSTAR:
168653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_POSSTARI:
168753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTPOSSTAR:
168853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTPOSSTARI:
1689f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
169053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_QUERY:
169153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_QUERYI:
169253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTQUERY:
169353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTQUERYI:
1694f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
169553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_MINQUERY:
169653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_MINQUERYI:
169753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTMINQUERY:
169853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTMINQUERYI:
1699f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
170053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_POSQUERY:
170153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_POSQUERYI:
170253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTPOSQUERY:
170353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTPOSQUERYI:
170453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
170553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
1706f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
170753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_UPTO:
170853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_UPTOI:
170953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTUPTO:
171053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTUPTOI:
1711f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
171253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_MINUPTO:
171353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_MINUPTOI:
171453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTMINUPTO:
171553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTMINUPTOI:
171653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
171753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_POSUPTO:
171853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_POSUPTOI:
171953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTPOSUPTO:
172053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_NOTPOSUPTOI:
172153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
172253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
172353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif  /* MAYBE_UTF_MULTI */
172453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
172553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* MARK, and PRUNE/SKIP/THEN with an argument must skip over the argument
172653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    string. */
172753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
172853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_MARK:
172953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_PRUNE_ARG:
173053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_SKIP_ARG:
173153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case OP_THEN_ARG:
173253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    code += code[1];
173353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
173453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
173553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* None of the remaining opcodes are required to match a character. */
173653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
173753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    default:
173853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
1739f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
1740f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
174153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
174253e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisISTRUE:
174353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisgroupinfo |= GI_COULD_BE_EMPTY;
174453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
174553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisISFALSE:
174653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (group > 0) cb->groupinfo[group] = groupinfo | GI_SET_COULD_BE_EMPTY;
174753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
174853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn ((groupinfo & GI_COULD_BE_EMPTY) != 0)? CBE_EMPTY : CBE_NOTEMPTY;
1749f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}
1750f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1751f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1752f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1753f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*************************************************
175453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*            Check for counted repeat            *
1755f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/
1756f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
175753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is called when a '{' is encountered in a place where it might
175853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstart a quantifier. It looks ahead to see if it really is a quantifier, that
175953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisis, one of the forms {ddd} {ddd,} or {ddd,ddd} where the ddds are digits.
1760f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
176153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArgument:   pointer to the first char after '{'
176253e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns:    TRUE or FALSE
1763f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/
1764f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1765f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic BOOL
176653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisis_counted_repeat(PCRE2_SPTR p)
1767f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{
176853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (!IS_DIGIT(*p)) return FALSE;
176953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisp++;
177053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswhile (IS_DIGIT(*p)) p++;
177153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
1772f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
177353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (*p++ != CHAR_COMMA) return FALSE;
177453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
1775f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
177653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (!IS_DIGIT(*p)) return FALSE;
177753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisp++;
177853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswhile (IS_DIGIT(*p)) p++;
1779f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
178053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn (*p == CHAR_RIGHT_CURLY_BRACKET);
178153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis}
1782f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1783f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
1784f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
178553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/*************************************************
178653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*            Handle escapes                      *
178753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*************************************************/
178853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
178953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is called when a \ has been encountered. It either returns a
179053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispositive value for a simple escape such as \d, or 0 for a data character, which
179153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisis placed in chptr. A backreference to group n is returned as negative n. On
179253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisentry, ptr is pointing at the \. On exit, it points the final code unit of the
179353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisescape sequence.
1794f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
179553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisThis function is also called from pcre2_substitute() to handle escape sequences
179653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisin replacement strings. In this case, the cb argument is NULL, and only
179753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissequences that define a data character are recognised. The isclass argument is
179853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnot relevant, but the options argument is the final value of the compiled
179953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispattern's options.
180053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
180153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisThere is one "trick" case: when a sequence such as [[:>:]] or \s in UCP mode is
180253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisprocessed, it is replaced by a nested alternative sequence. If this contains a
180353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisbackslash (which is usually does), ptrend does not point to its end - it still
180453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispoints to the end of the whole pattern. However, we can detect this case
180553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisbecause cb->nestptr[0] will be non-NULL. The nested sequences are all zero-
180653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisterminated and there are only ever two levels of nesting.
180753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
180853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments:
180953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ptrptr         points to the input position pointer
181053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ptrend         points to the end of the input
181153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  chptr          points to a returned data character
181253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  errorcodeptr   points to the errorcode variable (containing zero)
181353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  options        the current options bits
181453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  isclass        TRUE if inside a character class
181553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb             compile data block
181653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
181753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns:         zero => a data character
181853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                 positive => a special escape sequence
181953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                 negative => a back reference
182053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                 on error, errorcodeptr is set non-zero
182153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/
1822f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
182353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint
182453e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr,
182553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  int *errorcodeptr, uint32_t options, BOOL isclass, compile_block *cb)
182653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis{
182753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisBOOL utf = (options & PCRE2_UTF) != 0;
182853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR ptr = *ptrptr + 1;
182953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisregister uint32_t c, cc;
183053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint escape = 0;
183153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint i;
1832f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
183353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Find the end of a nested insert. */
1834f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
183553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (cb != NULL && cb->nestptr[0] != NULL)
183653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ptrend = ptr + PRIV(strlen)(ptr);
1837f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
183853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* If backslash is at the end of the string, it's an error. */
1839f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
184053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (ptr >= ptrend)
184153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
184253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  *errorcodeptr = ERR1;
184353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  return 0;
184453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
1845f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
184653e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisGETCHARINCTEST(c, ptr);         /* Get character value, increment pointer */
184753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisptr--;                          /* Set pointer back to the last code unit */
1848f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
184953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Non-alphanumerics are literals, so we just leave the value in c. An initial
185053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisvalue test saves a memory lookup for code points outside the alphanumeric
185153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisrange. Otherwise, do a table lookup. A non-zero result is something that can be
185253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturned immediately. Otherwise further processing is required. */
1853f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
185453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (c < ESCAPES_FIRST || c > ESCAPES_LAST) {}  /* Definitely literal */
1855f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
185653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiselse if ((i = escapes[c - ESCAPES_FIRST]) != 0)
185753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
185853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (i > 0) c = (uint32_t)i; else  /* Positive is a data character */
1859f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
186053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    escape = -i;                    /* Else return a special escape */
186153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (escape == ESC_P || escape == ESC_p || escape == ESC_X)
186253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      cb->external_flags |= PCRE2_HASBKPORX;   /* Note \P, \p, or \X */
1863f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
186453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
1865f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
186653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Escapes that need further processing, including those that are unknown.
186753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisWhen called from pcre2_substitute(), only \c, \o, and \x are recognized (and \u
186853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswhen BSUX is set). */
1869f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
187053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiselse
187153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
187253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  PCRE2_SPTR oldptr;
187353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  BOOL braced, negated, overflow;
187453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  unsigned int s;
187553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
187653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* Filter calls from pcre2_substitute(). */
187753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
187853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (cb == NULL && c != CHAR_c && c != CHAR_o && c != CHAR_x &&
187953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      (c != CHAR_u || (options & PCRE2_ALT_BSUX) != 0))
1880f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
188153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *errorcodeptr = ERR3;
188253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    return 0;
1883f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
1884f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
188553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  switch (c)
1886f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
188753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* A number of Perl escapes are not handled by PCRE. We give an explicit
188853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    error. */
1889f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
189053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case CHAR_l:
189153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case CHAR_L:
189253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *errorcodeptr = ERR37;
189353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
1894f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
189553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* \u is unrecognized when PCRE2_ALT_BSUX is not set. When it is treated
189653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    specially, \u must be followed by four hex digits. Otherwise it is a
189753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    lowercase u letter. */
189853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
189953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case CHAR_u:
190053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if ((options & PCRE2_ALT_BSUX) == 0) *errorcodeptr = ERR37; else
1901f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
190253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      uint32_t xc;
190353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if ((cc = XDIGIT(ptr[1])) == 0xff) break;  /* Not a hex digit */
190453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if ((xc = XDIGIT(ptr[2])) == 0xff) break;  /* Not a hex digit */
190553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      cc = (cc << 4) | xc;
190653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if ((xc = XDIGIT(ptr[3])) == 0xff) break;  /* Not a hex digit */
190753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      cc = (cc << 4) | xc;
190853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if ((xc = XDIGIT(ptr[4])) == 0xff) break;  /* Not a hex digit */
190953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      c = (cc << 4) | xc;
191053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      ptr += 4;
191153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (utf)
1912f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
191353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (c > 0x10ffffU) *errorcodeptr = ERR77;
191453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          else if (c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
1915f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
191653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      else if (c > MAX_NON_UTF_CHAR) *errorcodeptr = ERR77;
1917f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
191853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
1919f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
192053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case CHAR_U:
192153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* \U is unrecognized unless PCRE2_ALT_BSUX is set, in which case it is an
192253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    upper case letter. */
192353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if ((options & PCRE2_ALT_BSUX) == 0) *errorcodeptr = ERR37;
192453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
1925f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
192653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* In a character class, \g is just a literal "g". Outside a character
192753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    class, \g must be followed by one of a number of specific things:
1928f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
192953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    (1) A number, either plain or braced. If positive, it is an absolute
193053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    backreference. If negative, it is a relative backreference. This is a Perl
193153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    5.10 feature.
1932f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
193353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    (2) Perl 5.10 also supports \g{name} as a reference to a named group. This
193453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    is part of Perl's movement towards a unified syntax for back references. As
193553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    this is synonymous with \k{name}, we fudge it up by pretending it really
193653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    was \k.
1937f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
193853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    (3) For Oniguruma compatibility we also support \g followed by a name or a
193953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    number either in angle brackets or in single quotes. However, these are
194053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    (possibly recursive) subroutine calls, _not_ backreferences. Just return
194153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    the ESC_g code (cf \k). */
1942f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
194353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case CHAR_g:
194453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (isclass) break;
194553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)
194653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
194753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      escape = ESC_g;
194853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      break;
194953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
1950f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
195153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Handle the Perl-compatible cases */
195253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
195353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
1954f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
195553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      PCRE2_SPTR p;
195653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      for (p = ptr+2; *p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
195753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break;
195853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (*p != CHAR_NULL && *p != CHAR_RIGHT_CURLY_BRACKET)
195953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
196053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        escape = ESC_k;
196153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        break;
196253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
196353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      braced = TRUE;
196453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      ptr++;
196553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
196653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    else braced = FALSE;
196753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
196853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (ptr[1] == CHAR_MINUS)
196953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
197053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      negated = TRUE;
197153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      ptr++;
197253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
197353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    else negated = FALSE;
197453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
197553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* The integer range is limited by the machine's int representation. */
197653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    s = 0;
197753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    overflow = FALSE;
197853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    while (IS_DIGIT(ptr[1]))
197953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
198053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (s > INT_MAX / 10 - 1) /* Integer overflow */
198153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
198253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        overflow = TRUE;
198353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        break;
198453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
19858b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      s = s * 10 + (unsigned int)(*(++ptr) - CHAR_0);
198653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
198753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (overflow) /* Integer overflow */
198853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
198953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      while (IS_DIGIT(ptr[1])) ptr++;
199053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      *errorcodeptr = ERR61;
1991f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      break;
199253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
1993f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
199453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (braced && *(++ptr) != CHAR_RIGHT_CURLY_BRACKET)
199553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
199653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      *errorcodeptr = ERR57;
199753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      break;
199853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
1999f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
200053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (s == 0)
200153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
200253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      *errorcodeptr = ERR58;
2003f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      break;
2004f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
2005f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
200653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (negated)
200753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
200853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (s > cb->bracount)
200953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
201053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        *errorcodeptr = ERR15;
201153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        break;
201253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
201353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      s = cb->bracount - (s - 1);
201453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
2015f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
201653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    escape = -(int)s;
201753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
2018f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
201953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* The handling of escape sequences consisting of a string of digits
202053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    starting with one that is not zero is not straightforward. Perl has changed
202153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    over the years. Nowadays \g{} for backreferences and \o{} for octal are
202253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    recommended to avoid the ambiguities in the old syntax.
2023f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
202453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    Outside a character class, the digits are read as a decimal number. If the
202553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    number is less than 10, or if there are that many previous extracting left
202653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    brackets, it is a back reference. Otherwise, up to three octal digits are
202753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    read to form an escaped character code. Thus \123 is likely to be octal 123
202853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    (cf \0123, which is octal 012 followed by the literal 3).
2029f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
203053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    Inside a character class, \ followed by a digit is always either a literal
203153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    8 or 9 or an octal number. */
2032f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
203353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:
203453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
2035f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
203653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (!isclass)
203753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
203853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      oldptr = ptr;
203953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* The integer range is limited by the machine's int representation. */
204053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      s = c - CHAR_0;
204153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      overflow = FALSE;
204253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      while (IS_DIGIT(ptr[1]))
204353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
204453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (s > INT_MAX / 10 - 1) /* Integer overflow */
204553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
204653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          overflow = TRUE;
204753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          break;
204853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
20498b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis        s = s * 10 + (unsigned int)(*(++ptr) - CHAR_0);
205053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
205153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (overflow) /* Integer overflow */
205253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
205353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        while (IS_DIGIT(ptr[1])) ptr++;
205453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        *errorcodeptr = ERR61;
205553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        break;
205653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
2057f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
205853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* \1 to \9 are always back references. \8x and \9x are too; \1x to \7x
205953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      are octal escapes if there are not that many previous captures. */
2060f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
206153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (s < 10 || *oldptr >= CHAR_8 || s <= cb->bracount)
206253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
206353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        escape = -(int)s;     /* Indicates a back reference */
206453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        break;
206553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
206653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      ptr = oldptr;      /* Put the pointer back and fall through */
206753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
2068f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
206953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Handle a digit following \ when the number is not a back reference, or
207053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    we are within a character class. If the first digit is 8 or 9, Perl used to
207153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    generate a binary zero byte and then treat the digit as a following
207253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    literal. At least by Perl 5.18 this changed so as not to insert the binary
207353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    zero. */
2074f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
207553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if ((c = *ptr) >= CHAR_8) break;
2076f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
207753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Fall through with a digit less than 8 */
2078f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
207953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* \0 always starts an octal number, but we may drop through to here with a
208053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    larger first octal digit. The original code used just to take the least
208153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    significant 8 bits of octal numbers (I think this is what early Perls used
208253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    to do). Nowadays we allow for larger numbers in UTF-8 mode and 16-bit mode,
208353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    but no more than 3 octal digits. */
2084f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
208553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case CHAR_0:
208653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    c -= CHAR_0;
208753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
208853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        c = c * 8 + *(++ptr) - CHAR_0;
208953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if PCRE2_CODE_UNIT_WIDTH == 8
209053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (!utf && c > 0xff) *errorcodeptr = ERR51;
209153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
2092f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    break;
2093f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
209453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* \o is a relatively new Perl feature, supporting a more general way of
209553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    specifying character codes in octal. The only supported form is \o{ddd}. */
2096f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
209753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case CHAR_o:
209853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (ptr[1] != CHAR_LEFT_CURLY_BRACKET) *errorcodeptr = ERR55; else
209953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (ptr[2] == CHAR_RIGHT_CURLY_BRACKET) *errorcodeptr = ERR78; else
210053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
210153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      ptr += 2;
210253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      c = 0;
210353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      overflow = FALSE;
210453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      while (*ptr >= CHAR_0 && *ptr <= CHAR_7)
210553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
210653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        cc = *ptr++;
210753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
210853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if PCRE2_CODE_UNIT_WIDTH == 32
210953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (c >= 0x20000000l) { overflow = TRUE; break; }
211053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
211153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        c = (c << 3) + (cc - CHAR_0);
211253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if PCRE2_CODE_UNIT_WIDTH == 8
211353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
211453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#elif PCRE2_CODE_UNIT_WIDTH == 16
211553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (c > (utf ? 0x10ffffU : 0xffffU)) { overflow = TRUE; break; }
211653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#elif PCRE2_CODE_UNIT_WIDTH == 32
211753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (utf && c > 0x10ffffU) { overflow = TRUE; break; }
211853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
211953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
212053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (overflow)
212153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
212253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        while (*ptr >= CHAR_0 && *ptr <= CHAR_7) ptr++;
212353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        *errorcodeptr = ERR34;
212453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
212553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
212653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
212753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
212853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
212953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      else *errorcodeptr = ERR64;
213053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
2131f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    break;
2132f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
213353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* \x is complicated. When PCRE2_ALT_BSUX is set, \x must be followed by
213453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    two hexadecimal digits. Otherwise it is a lowercase x letter. */
2135f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
213653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case CHAR_x:
213753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if ((options & PCRE2_ALT_BSUX) != 0)
213853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
213953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      uint32_t xc;
214053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if ((cc = XDIGIT(ptr[1])) == 0xff) break;  /* Not a hex digit */
214153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if ((xc = XDIGIT(ptr[2])) == 0xff) break;  /* Not a hex digit */
214253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      c = (cc << 4) | xc;
214353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      ptr += 2;
214453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }    /* End PCRE2_ALT_BSUX handling */
2145f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
214653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Handle \x in Perl's style. \x{ddd} is a character number which can be
214753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    greater than 0xff in UTF-8 or non-8bit mode, but only if the ddd are hex
214853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    digits. If not, { used to be treated as a data character. However, Perl
214953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    seems to read hex digits up to the first non-such, and ignore the rest, so
215053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    that, for example \x{zz} matches a binary zero. This seems crazy, so PCRE
215153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    now gives an error. */
2152f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
215353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    else
215453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
215553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
215653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
215753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        ptr += 2;
215853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
215953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
216053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          *errorcodeptr = ERR78;
216153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          break;
216253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
216353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        c = 0;
216453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        overflow = FALSE;
2165f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
216653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        while ((cc = XDIGIT(*ptr)) != 0xff)
216753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
216853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          ptr++;
216953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (c == 0 && cc == 0) continue;   /* Leading zeroes */
217053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if PCRE2_CODE_UNIT_WIDTH == 32
217153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (c >= 0x10000000l) { overflow = TRUE; break; }
217253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
217353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          c = (c << 4) | cc;
217453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if ((utf && c > 0x10ffffU) || (!utf && c > MAX_NON_UTF_CHAR))
217553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {
217653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            overflow = TRUE;
217753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            break;
217853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            }
217953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
2180f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
218153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (overflow)
218253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
218353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          while (XDIGIT(*ptr) != 0xff) ptr++;
218453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          *errorcodeptr = ERR34;
218553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
218653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
218753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
218853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
218953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
2190f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
219153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* If the sequence of hex digits does not end with '}', give an error.
219253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        We used just to recognize this construct and fall through to the normal
219353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        \x handling, but nowadays Perl gives an error, which seems much more
219453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        sensible, so we do too. */
2195f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
219653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        else *errorcodeptr = ERR67;
219753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }   /* End of \x{} processing */
2198f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
219953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Read a single-byte hex-defined char (up to two hex digits after \x) */
2200f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
220153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      else
220253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
220353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        c = 0;
220453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if ((cc = XDIGIT(ptr[1])) == 0xff) break;  /* Not a hex digit */
220553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        ptr++;
220653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        c = cc;
220753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if ((cc = XDIGIT(ptr[1])) == 0xff) break;  /* Not a hex digit */
220853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        ptr++;
220953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        c = (c << 4) | cc;
221053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }     /* End of \xdd handling */
221153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }       /* End of Perl-style \x handling */
2212f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    break;
2213f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
221453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* The handling of \c is different in ASCII and EBCDIC environments. In an
221553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    ASCII (or Unicode) environment, an error is given if the character
221653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    following \c is not a printable ASCII character. Otherwise, the following
221753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    character is upper-cased if it is a letter, and after that the 0x40 bit is
221853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    flipped. The result is the value of the escape.
2219f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
222053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    In an EBCDIC environment the handling of \c is compatible with the
222153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    specification in the perlebcdic document. The following character must be
222253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    a letter or one of small number of special characters. These provide a
222353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    means of defining the character values 0-31.
2224f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
222553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    For testing the EBCDIC handling of \c in an ASCII environment, recognize
222653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    the EBCDIC value of 'c' explicitly. */
2227f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
222853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if defined EBCDIC && 'a' != 0x81
222953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case 0x83:
223053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else
223153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case CHAR_c:
2232f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
2233f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
223453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    c = *(++ptr);
223553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (c >= CHAR_a && c <= CHAR_z) c = UPPER_CASE(c);
223653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (c == CHAR_NULL && ptr >= ptrend)
223753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
223853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      *errorcodeptr = ERR2;
223953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      break;
224053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
2241f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
224253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Handle \c in an ASCII/Unicode environment. */
2243f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
224453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifndef EBCDIC    /* ASCII/UTF-8 coding */
224553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (c < 32 || c > 126)  /* Excludes all non-printable ASCII */
224653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
224753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      *errorcodeptr = ERR68;
224853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      break;
224953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
225053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    c ^= 0x40;
2251f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
225253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Handle \c in an EBCDIC environment. The special case \c? is converted to
225353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    255 (0xff) or 95 (0x5f) if other character suggest we are using th POSIX-BC
225453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    encoding. (This is the way Perl indicates that it handles \c?.) The other
225553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    valid sequences correspond to a list of specific characters. */
2256f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
225753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else
225853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (c == CHAR_QUESTION_MARK)
225953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      c = ('\\' == 188 && '`' == 74)? 0x5f : 0xff;
226053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    else
226153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
226253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      for (i = 0; i < 32; i++)
226353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
226453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (c == ebcdic_escape_c[i]) break;
226553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
226653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (i < 32) c = i; else *errorcodeptr = ERR68;
226753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
226853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif  /* EBCDIC */
2269f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
227053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
2271f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
227253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Any other alphanumeric following \ is an error. Perl gives an error only
227353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if in warning mode, but PCRE doesn't have a warning mode. */
2274f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
227553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    default:
227653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *errorcodeptr = ERR3;
227753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
227853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
2279f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
2280f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
228153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Perl supports \N{name} for character names, as well as plain \N for "not
228253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnewline". PCRE does not support \N{name}. However, it does support
228353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisquantification such as \N{2,3}. */
2284f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
228553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (escape == ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET &&
228653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis     !is_counted_repeat(ptr+2))
228753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  *errorcodeptr = ERR37;
2288f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
228953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* If PCRE2_UCP is set, we change the values for \d etc. */
2290f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
229153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((options & PCRE2_UCP) != 0 && escape >= ESC_D && escape <= ESC_w)
229253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  escape += (ESC_DU - ESC_D);
2293f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
229453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Set the pointer to the final character before returning. */
2295f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
229653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*ptrptr = ptr;
229753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*chptr = c;
229853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn escape;
2299f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}
2300f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2301f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2302f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
230353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
2304f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*************************************************
230553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*               Handle \P and \p                 *
2306f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/
2307f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
230853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is called after \P or \p has been encountered, provided that
230953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2 is compiled with support for UTF and Unicode properties. On entry, the
231053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscontents of ptrptr are pointing at the P or p. On exit, it is left pointing at
231153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe final code unit of the escape sequence.
2312f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2313f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments:
231453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ptrptr         the pattern position pointer
231553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  negptr         a boolean that is set TRUE for negation else FALSE
231653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ptypeptr       an unsigned int that is set to the type value
231753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  pdataptr       an unsigned int that is set to the detailed property value
231853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  errorcodeptr   the error code variable
231953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb             the compile data
2320f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
232153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns:         TRUE if the type value was found, or FALSE for an invalid type
2322f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/
2323f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2324f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic BOOL
232553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisget_ucp(PCRE2_SPTR *ptrptr, BOOL *negptr, unsigned int *ptypeptr,
232653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  unsigned int *pdataptr, int *errorcodeptr, compile_block *cb)
2327f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{
232853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisregister PCRE2_UCHAR c;
23298b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskissize_t i, bot, top;
233053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR ptr = *ptrptr;
233153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR name[32];
2332f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
233353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*negptr = FALSE;
233453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisc = *(++ptr);
2335f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
233653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* \P or \p can be followed by a name in {}, optionally preceded by ^ for
233753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnegation. */
2338f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
233953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (c == CHAR_LEFT_CURLY_BRACKET)
234053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
234153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
234253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
234353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *negptr = TRUE;
234453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    ptr++;
234553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
234653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  for (i = 0; i < (int)(sizeof(name) / sizeof(PCRE2_UCHAR)) - 1; i++)
234753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
234853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    c = *(++ptr);
234953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (c == CHAR_NULL) goto ERROR_RETURN;
235053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (c == CHAR_RIGHT_CURLY_BRACKET) break;
235153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    name[i] = c;
235253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
235353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
235453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  name[i] = 0;
235553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
2356f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
235753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Otherwise there is just one following character, which must be an ASCII
235853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisletter. */
2359f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
236053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiselse if (MAX_255(c) && (cb->ctypes[c] & ctype_letter) != 0)
236153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
236253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  name[0] = c;
236353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  name[1] = 0;
236453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
236553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiselse goto ERROR_RETURN;
2366f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
236753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*ptrptr = ptr;
2368f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
236953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Search for a recognized property name using binary chop. */
2370f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
237153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisbot = 0;
237253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskistop = PRIV(utt_size);
2373f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
237453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswhile (bot < top)
237553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
237653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  int r;
237753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  i = (bot + top) >> 1;
237853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  r = PRIV(strcmp_c8)(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);
237953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (r == 0)
2380f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
238153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *ptypeptr = PRIV(utt)[i].type;
238253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *pdataptr = PRIV(utt)[i].value;
238353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    return TRUE;
2384f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
238553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (r > 0) bot = i + 1; else top = i;
2386f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
238753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*errorcodeptr = ERR47;   /* Unrecognized name */
238853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn FALSE;
2389f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
239053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisERROR_RETURN:            /* Malformed \P or \p */
239153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*errorcodeptr = ERR46;
239253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*ptrptr = ptr;
2393f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichreturn FALSE;
2394f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}
239553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
2396f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2397f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2398f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2399f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*************************************************
240053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*         Read repeat counts                     *
2401f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/
2402f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
240353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Read an item of the form {n,m} and return the values. This is called only
240453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisafter is_counted_repeat() has confirmed that a repeat-count quantifier exists,
240553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisso the syntax is guaranteed to be correct, but we need to check the values.
2406f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2407f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments:
240853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  p              pointer to first char after '{'
240953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  minp           pointer to int for min
241053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  maxp           pointer to int for max
241153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                 returned as -1 if no max
241253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  errorcodeptr   points to error code variable
241353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
241453e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns:         pointer to '}' on success;
241553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                 current ptr on error, with errorcodeptr set non-zero
2416f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/
2417f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
241853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic PCRE2_SPTR
241953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisread_repeat_counts(PCRE2_SPTR p, int *minp, int *maxp, int *errorcodeptr)
2420f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{
242153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint min = 0;
242253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint max = -1;
2423f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
242453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswhile (IS_DIGIT(*p))
2425f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
242653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  min = min * 10 + (int)(*p++ - CHAR_0);
242753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (min > 65535)
2428f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
242953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *errorcodeptr = ERR5;
243053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    return p;
2431f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
2432f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
2433f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
243453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else
2435f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
243653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
2437f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
243853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    max = 0;
243953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    while(IS_DIGIT(*p))
244053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
244153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      max = max * 10 + (int)(*p++ - CHAR_0);
244253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (max > 65535)
244353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
244453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        *errorcodeptr = ERR5;
244553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        return p;
244653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
244753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
244853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (max < min)
244953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
245053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      *errorcodeptr = ERR4;
245153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      return p;
245253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
2453f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
2454f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
245553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
245653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*minp = min;
245753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*maxp = max;
245853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn p;
2459f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}
2460f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2461f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2462f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2463f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*************************************************
246453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*   Scan compiled regex for recursion reference  *
2465f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/
2466f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
246753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function scans through a compiled pattern until it finds an instance of
246853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisOP_RECURSE.
2469f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2470f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments:
247153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  code        points to start of expression
247253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  utf         TRUE in UTF mode
2473f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
247453e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns:      pointer to the opcode for OP_RECURSE, or NULL if not found
2475f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/
2476f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
247753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic PCRE2_SPTR
247853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfind_recurse(PCRE2_SPTR code, BOOL utf)
2479f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{
248053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (;;)
2481f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
248253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  register PCRE2_UCHAR c = *code;
248353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (c == OP_END) return NULL;
248453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (c == OP_RECURSE) return code;
2485f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
248653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* XCLASS is used for classes that cannot be represented just by a bit map.
248753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  This includes negated single high-valued characters. CALLOUT_STR is used for
248853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  callouts with string arguments. In both cases the length in the table is
248953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  zero; the actual length is stored in the compiled code. */
2490f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
249153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (c == OP_XCLASS) code += GET(code, 1);
249253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
2493f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
249453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* Otherwise, we can get the item's length from the table, except that for
249553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  repeated character types, we have to test for \p and \P, which have an extra
249653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
249753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  must add in its length. */
2498f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
249953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  else
2500f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
250153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    switch(c)
250253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
250353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_TYPESTAR:
250453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_TYPEMINSTAR:
250553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_TYPEPLUS:
250653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_TYPEMINPLUS:
250753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_TYPEQUERY:
250853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_TYPEMINQUERY:
250953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_TYPEPOSSTAR:
251053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_TYPEPOSPLUS:
251153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_TYPEPOSQUERY:
251253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
251353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      break;
2514f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
251553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_TYPEPOSUPTO:
251653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_TYPEUPTO:
251753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_TYPEMINUPTO:
251853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_TYPEEXACT:
251953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
252053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        code += 2;
252153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      break;
2522f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
252353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_MARK:
252453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_PRUNE_ARG:
252553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_SKIP_ARG:
252653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_THEN_ARG:
252753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      code += code[1];
252853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      break;
2529f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
2530f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
253153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Add in the fixed length from the table */
2532f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2533f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    code += PRIV(OP_lengths)[c];
2534f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
253553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may
253653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    be followed by a multi-unit character. The length in the table is a
253753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    minimum, so we have to arrange to skip the extra units. */
253853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
253953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef MAYBE_UTF_MULTI
254053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (utf) switch(c)
2541f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
254253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_CHAR:
254353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_CHARI:
254453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOT:
254553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTI:
254653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_EXACT:
254753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_EXACTI:
254853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTEXACT:
254953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTEXACTI:
255053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_UPTO:
255153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_UPTOI:
255253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTUPTO:
255353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTUPTOI:
255453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_MINUPTO:
255553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_MINUPTOI:
255653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTMINUPTO:
255753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTMINUPTOI:
255853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_POSUPTO:
255953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_POSUPTOI:
256053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTPOSUPTO:
256153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTPOSUPTOI:
256253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_STAR:
256353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_STARI:
256453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTSTAR:
256553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTSTARI:
256653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_MINSTAR:
256753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_MINSTARI:
256853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTMINSTAR:
256953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTMINSTARI:
257053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_POSSTAR:
257153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_POSSTARI:
257253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTPOSSTAR:
257353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTPOSSTARI:
257453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_PLUS:
257553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_PLUSI:
257653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTPLUS:
257753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTPLUSI:
257853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_MINPLUS:
257953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_MINPLUSI:
258053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTMINPLUS:
258153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTMINPLUSI:
258253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_POSPLUS:
258353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_POSPLUSI:
258453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTPOSPLUS:
258553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTPOSPLUSI:
258653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_QUERY:
258753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_QUERYI:
258853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTQUERY:
258953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTQUERYI:
259053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_MINQUERY:
259153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_MINQUERYI:
259253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTMINQUERY:
259353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTMINQUERYI:
259453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_POSQUERY:
259553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_POSQUERYI:
259653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTPOSQUERY:
259753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case OP_NOTPOSQUERYI:
259853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
259953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      break;
2600f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
260153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else
260253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    (void)(utf);  /* Keep compiler happy by referencing function argument */
260353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif  /* MAYBE_UTF_MULTI */
260453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
260553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
260653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis}
2607f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2608f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2609f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
261053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/*************************************************
261153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*           Check for POSIX class syntax         *
261253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*************************************************/
261353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
261453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is called when the sequence "[:" or "[." or "[=" is
261553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisencountered in a character class. It checks whether this is followed by a
261653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissequence of characters terminated by a matching ":]" or ".]" or "=]". If we
261753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreach an unescaped ']' without the special preceding character, return FALSE.
2618f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
261953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisOriginally, this function only recognized a sequence of letters between the
262053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisterminators, but it seems that Perl recognizes any sequence of characters,
262153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthough of course unknown POSIX names are subsequently rejected. Perl gives an
262253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis"Unknown POSIX class" error for [:f\oo:] for example, where previously PCRE
262353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisdidn't consider this to be a POSIX class. Likewise for [:1234:].
2624f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
262553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisThe problem in trying to be exactly like Perl is in the handling of escapes. We
262653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskishave to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX
262753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisclass, but [abc[:x\]pqr:]] is (so that an error can be generated). The code
262853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisbelow handles the special cases \\ and \], but does not try to do any other
262953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisescape processing. This makes it different from Perl for cases such as
263053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis[:l\ower:] where Perl recognizes it as the POSIX class "lower" but PCRE does
263153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnot recognize "l\ower". This is a lesser evil than not diagnosing bad classes
263253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswhen Perl does, I think.
2633f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
263453e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisA user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not.
263553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisIt seems that the appearance of a nested POSIX class supersedes an apparent
263653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisexternal class. For example, [:a[:digit:]b:] matches "a", "b", ":", or
263753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisa digit. This is handled by returning FALSE if the start of a new group with
263853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe same terminator is encountered, since the next closing sequence must close
263953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe nested group, not the outer one.
2640f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
264153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisIn Perl, unescaped square brackets may also appear as part of class names. For
264253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisexample, [:a[:abc]b:] gives unknown POSIX class "[:abc]b:]". However, for
264353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis[:a[:abc]b][b:] it gives unknown POSIX class "[:abc]b][b:]", which does not
264453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisseem right at all. PCRE does not allow closing square brackets in POSIX class
264553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnames.
2646f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
264753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments:
264853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ptr      pointer to the initial [
264953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  endptr   where to return a pointer to the terminating ':', '.', or '='
2650f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
265153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns:   TRUE or FALSE
265253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/
2653f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
265453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic BOOL
265553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskischeck_posix_syntax(PCRE2_SPTR ptr, PCRE2_SPTR *endptr)
265653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis{
265753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR terminator;  /* Don't combine these lines; the Solaris cc */
265853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisterminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
2659f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
266053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (++ptr; *ptr != CHAR_NULL; ptr++)
266153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
266253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (*ptr == CHAR_BACKSLASH &&
266353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET || ptr[1] == CHAR_BACKSLASH))
266453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    ptr++;
266553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  else if ((*ptr == CHAR_LEFT_SQUARE_BRACKET && ptr[1] == terminator) ||
266653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            *ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
266753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  else if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
2668f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
266953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *endptr = ptr;
267053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    return TRUE;
2671f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
267253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
2673f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
267453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn FALSE;
267553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis}
2676f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2677f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2678f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
267953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/*************************************************
268053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*          Check POSIX class name                *
268153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*************************************************/
2682f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
268353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is called to check the name given in a POSIX-style class entry
268453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissuch as [:alnum:].
2685f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
268653e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments:
268753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ptr        points to the first letter
268853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  len        the length of the name
2689f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
269053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns:     a value representing the name, or -1 if unknown
269153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/
2692f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
269353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic int
269453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskischeck_posix_name(PCRE2_SPTR ptr, int len)
269553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis{
269653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisconst char *pn = posix_names;
269753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisregister int yield = 0;
269853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswhile (posix_name_lengths[yield] != 0)
269953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
270053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (len == posix_name_lengths[yield] &&
270153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    PRIV(strncmp_c8)(ptr, pn, (unsigned int)len) == 0) return yield;
270253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  pn += posix_name_lengths[yield] + 1;
270353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  yield++;
270453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
270553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn -1;
270653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis}
2707f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2708f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2709f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
271053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
271153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/*************************************************
271253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*           Get othercase range                  *
271353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*************************************************/
2714f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
271553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is passed the start and end of a class range in UCT mode. It
271653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissearches up the characters, looking for ranges of characters in the "other"
271753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscase. Each call returns the next one, updating the start address. A character
271853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswith multiple other cases is returned on its own with a special return value.
2719f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
272053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments:
272153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cptr        points to starting character value; updated
272253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  d           end value
272353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ocptr       where to put start of othercase range
272453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  odptr       where to put end of othercase range
2725f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
272653e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisYield:        -1 when no more
272753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis               0 when a range is returned
272853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              >0 the CASESET offset for char with multiple other cases
272953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                in this case, ocptr contains the original
273053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/
2731f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
273253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic int
273353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisget_othercase_range(uint32_t *cptr, uint32_t d, uint32_t *ocptr,
273453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uint32_t *odptr)
273553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis{
273653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t c, othercase, next;
273753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisunsigned int co;
2738f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
273953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Find the first character that has an other case. If it has multiple other
274053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscases, return its case offset value. */
2741f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
274253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (c = *cptr; c <= d; c++)
274353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
274453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if ((co = UCD_CASESET(c)) != 0)
274553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
274653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *ocptr = c++;   /* Character that has the set */
274753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *cptr = c;      /* Rest of input range */
274853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    return (int)co;
274953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
275053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if ((othercase = UCD_OTHERCASE(c)) != c) break;
275153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
2752f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
275353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (c > d) return -1;  /* Reached end of range */
2754f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
275553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Found a character that has a single other case. Search for the end of the
275653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisrange, which is either the end of the input range, or a character that has zero
275753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisor more than one other cases. */
2758f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
275953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*ocptr = othercase;
276053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnext = othercase + 1;
2761f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
276253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (++c; c <= d; c++)
276353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
276453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if ((co = UCD_CASESET(c)) != 0 || UCD_OTHERCASE(c) != next) break;
276553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  next++;
276653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
2767f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
276853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*odptr = next - 1;     /* End of othercase range */
276953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*cptr = c;             /* Rest of input range */
277053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn 0;
277153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis}
277253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif  /* SUPPORT_UNICODE */
2773f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2774f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2775f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
277653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/*************************************************
277753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*        Add a character or range to a class     *
277853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*************************************************/
2779f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
278053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function packages up the logic of adding a character or range of
278153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskischaracters to a class. The character values in the arguments will be within the
278253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisvalid values for the current mode (8-bit, 16-bit, UTF, etc). This function is
278353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismutually recursive with the function immediately below.
2784f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
278553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments:
278653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  classbits     the bit map for characters < 256
278753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uchardptr     points to the pointer for extra data
278853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  options       the options word
278953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb            compile data
279053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  start         start of range character
279153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  end           end of range character
2792f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
279353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns:        the number of < 256 characters added
279453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                the pointer to extra data is updated
279553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/
2796f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
27978b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisstatic unsigned int
279853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisadd_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options,
279953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  compile_block *cb, uint32_t start, uint32_t end)
280053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis{
280153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t c;
280253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t classbits_end = (end <= 0xff ? end : 0xff);
28038b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisunsigned int n8 = 0;
2804f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
280553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* If caseless matching is required, scan the range and process alternate
280653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscases. In Unicode, there are 8-bit characters that have alternate cases that
280753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisare greater than 255 and vice-versa. Sometimes we can just extend the original
280853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisrange. */
2809f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
281053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((options & PCRE2_CASELESS) != 0)
281153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
281253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
281353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if ((options & PCRE2_UTF) != 0)
281453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
281553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    int rc;
281653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    uint32_t oc, od;
2817f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
281853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    options &= ~PCRE2_CASELESS;   /* Remove for recursive calls */
281953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    c = start;
2820f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
282153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0)
282253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
282353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Handle a single character that has more than one other case. */
2824f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
282553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (rc > 0) n8 += add_list_to_class(classbits, uchardptr, options, cb,
282653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        PRIV(ucd_caseless_sets) + rc, oc);
2827f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
282853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Do nothing if the other case range is within the original range. */
2829f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
283053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      else if (oc >= start && od <= end) continue;
2831f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
283253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Extend the original range if there is overlap, noting that if oc < c, we
283353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      can't have od > end because a subrange is always shorter than the basic
283453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      range. Otherwise, use a recursive call to add the additional range. */
2835f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
283653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */
283753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      else if (od > end && oc <= end + 1)
2838f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
283953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        end = od;       /* Extend upwards */
284053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (end > classbits_end) classbits_end = (end <= 0xff ? end : 0xff);
2841f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
284253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      else n8 += add_to_class(classbits, uchardptr, options, cb, oc, od);
284353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
284453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
284553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  else
284653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif  /* SUPPORT_UNICODE */
2847f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
284853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* Not UTF mode */
2849f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
285053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  for (c = start; c <= classbits_end; c++)
285153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
285253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    SETBIT(classbits, cb->fcc[c]);
285353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    n8++;
285453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
285553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
2856f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
285753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Now handle the original range. Adjust the final value according to the bit
285853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskislength - this means that the same lists of (e.g.) horizontal spaces can be used
285953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisin all cases. */
2860f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
286153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((options & PCRE2_UTF) == 0 && end > MAX_NON_UTF_CHAR)
286253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  end = MAX_NON_UTF_CHAR;
2863f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
286453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Use the bitmap for characters < 256. Otherwise use extra data.*/
2865f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
286653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (c = start; c <= classbits_end; c++)
286753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
286853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* Regardless of start, c will always be <= 255. */
286953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  SETBIT(classbits, c);
287053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  n8++;
287153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
2872f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
287353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_WIDE_CHARS
287453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (start <= 0xff) start = 0xff + 1;
2875f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
287653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (end >= start)
287753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
287853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  PCRE2_UCHAR *uchardata = *uchardptr;
2879f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
288053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
288153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if ((options & PCRE2_UTF) != 0)
288253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
288353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (start < end)
288453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
288553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      *uchardata++ = XCL_RANGE;
288653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      uchardata += PRIV(ord2utf)(start, uchardata);
288753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      uchardata += PRIV(ord2utf)(end, uchardata);
288853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
288953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    else if (start == end)
289053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
289153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      *uchardata++ = XCL_SINGLE;
289253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      uchardata += PRIV(ord2utf)(start, uchardata);
2893f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
2894f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
289553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  else
289653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif  /* SUPPORT_UNICODE */
2897f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
289853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* Without UTF support, character values are constrained by the bit length,
289953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  and can only be > 256 for 16-bit and 32-bit libraries. */
2900f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
290153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if PCRE2_CODE_UNIT_WIDTH == 8
290253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {}
290353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else
290453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (start < end)
290553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
290653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *uchardata++ = XCL_RANGE;
290753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *uchardata++ = start;
290853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *uchardata++ = end;
290953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
291053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  else if (start == end)
291153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
291253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *uchardata++ = XCL_SINGLE;
291353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *uchardata++ = start;
291453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
291553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
291653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  *uchardptr = uchardata;   /* Updata extra data pointer */
2917f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
291853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else
291953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  (void)uchardptr;          /* Avoid compiler warning */
292053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif /* SUPPORT_WIDE_CHARS */
2921f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
292253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn n8;    /* Number of 8-bit characters */
2923f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}
2924f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2925f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2926f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2927f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*************************************************
292853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*        Add a list of characters to a class     *
2929f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/
2930f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
293153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is used for adding a list of case-equivalent characters to a
293253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisclass, and also for adding a list of horizontal or vertical whitespace. If the
293353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskislist is in order (which it should be), ranges of characters are detected and
293453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskishandled appropriately. This function is mutually recursive with the function
293553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisabove.
2936f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
2937f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments:
293853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  classbits     the bit map for characters < 256
293953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uchardptr     points to the pointer for extra data
294053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  options       the options word
294153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb            contains pointers to tables etc.
294253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  p             points to row of 32-bit values, terminated by NOTACHAR
294353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  except        character to omit; this is used when adding lists of
294453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                  case-equivalent characters to avoid including the one we
294553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                  already know about
2946f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
294753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns:        the number of < 256 characters added
294853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                the pointer to extra data is updated
2949f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/
2950f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
29518b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisstatic unsigned int
295253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisadd_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options,
295353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  compile_block *cb, const uint32_t *p, unsigned int except)
2954f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{
29558b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisunsigned int n8 = 0;
295653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswhile (p[0] < NOTACHAR)
2957f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
29588b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis  unsigned int n = 0;
295953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (p[0] != except)
296053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
296153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    while(p[n+1] == p[0] + n + 1) n++;
296253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    n8 += add_to_class(classbits, uchardptr, options, cb, p[0], p[n]);
296353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
296453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  p += n + 1;
296553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
296653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn n8;
296753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis}
29688366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes
29698366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes
2970f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
297153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/*************************************************
297253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*    Add characters not in a list to a class     *
297353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*************************************************/
2974f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
297553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is used for adding the complement of a list of horizontal or
297653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisvertical whitespace to a class. The list must be in order.
2977f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
297853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments:
297953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  classbits     the bit map for characters < 256
298053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uchardptr     points to the pointer for extra data
298153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  options       the options word
298253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb            contains pointers to tables etc.
298353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  p             points to row of 32-bit values, terminated by NOTACHAR
2984f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
298553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns:        the number of < 256 characters added
298653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                the pointer to extra data is updated
298753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/
2988f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
29898b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisstatic unsigned int
299053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisadd_not_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
299153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uint32_t options, compile_block *cb, const uint32_t *p)
299253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis{
299353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisBOOL utf = (options & PCRE2_UTF) != 0;
29948b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisunsigned int n8 = 0;
299553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (p[0] > 0)
299653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  n8 += add_to_class(classbits, uchardptr, options, cb, 0, p[0] - 1);
299753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswhile (p[0] < NOTACHAR)
299853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
299953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  while (p[1] == p[0] + 1) p++;
300053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  n8 += add_to_class(classbits, uchardptr, options, cb, p[0] + 1,
300153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    (p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1);
300253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  p++;
300353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
300453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn n8;
300553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis}
3006f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3007f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3008f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
300953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/*************************************************
301053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*       Process (*VERB) name for escapes         *
301153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*************************************************/
3012f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
301353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is called when the PCRE2_ALT_VERBNAMES option is set, to
301453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisprocess the characters in a verb's name argument. It is called twice, once with
301553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscodeptr == NULL, to find out the length of the processed name, and again to put
301653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe name into memory.
3017f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
301853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisArguments:
301953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ptrptr        pointer to the input pointer
302053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  codeptr       pointer to the compiled code pointer
302153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  errorcodeptr  pointer to the error code
302253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  options       the options bits
302353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  utf           TRUE if processing UTF
302453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb            compile data block
302553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
302653e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns:        length of the processed name, or < 0 on error
302753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/
3028f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
302953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic int
303053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisprocess_verb_name(PCRE2_SPTR *ptrptr, PCRE2_UCHAR **codeptr, int *errorcodeptr,
303153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uint32_t options, BOOL utf, compile_block *cb)
303253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis{
303353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint32_t arglen = 0;
303453e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisBOOL inescq = FALSE;
303553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR ptr = *ptrptr;
303653e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *code = (codeptr == NULL)? NULL : *codeptr;
3037f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
303853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (; ptr < cb->end_pattern; ptr++)
303953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
304053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uint32_t x = *ptr;
3041f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
304253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* Skip over literals */
3043f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
304453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (inescq)
304553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
304653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (x == CHAR_BACKSLASH && ptr[1] == CHAR_E)
304753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
304853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      inescq = FALSE;
304953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      ptr++;;
305053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      continue;
3051f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
3052f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
3053f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
305453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  else  /* Not a literal character */
3055f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
305653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (x == CHAR_RIGHT_PARENTHESIS) break;
3057f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
305853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Skip over comments and whitespace in extended mode. */
3059f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
306053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if ((options & PCRE2_EXTENDED) != 0)
306153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
306253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      PCRE2_SPTR wscptr = ptr;
306353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      while (MAX_255(x) && (cb->ctypes[x] & ctype_space) != 0) x = *(++ptr);
306453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (x == CHAR_NUMBER_SIGN)
306553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
306653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        ptr++;
306753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        while (*ptr != CHAR_NULL || ptr < cb->end_pattern)
306853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
306953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (IS_NEWLINE(ptr))       /* For non-fixed-length newline cases, */
307053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {                        /* IS_NEWLINE sets cb->nllen. */
307153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            ptr += cb->nllen;
307253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            break;
307353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            }
307453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          ptr++;
307553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
307653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (utf) FORWARDCHAR(ptr);
3077f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
307853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
307953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
3080f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
308153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* If we have skipped any characters, restart the loop. */
3082f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
308353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (ptr > wscptr)
308453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
308553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        ptr--;
308653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        continue;
308753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
308853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
3089f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
309053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Process escapes */
3091f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
309253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (x == '\\')
309353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
309453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      int rc;
309553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      *errorcodeptr = 0;
309653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      rc = PRIV(check_escape)(&ptr, cb->end_pattern, &x, errorcodeptr, options,
309753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        FALSE, cb);
309853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      *ptrptr = ptr;   /* For possible error */
309953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (*errorcodeptr != 0) return -1;
310053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (rc != 0)
310153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
310253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (rc == ESC_Q)
310353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
310453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          inescq = TRUE;
310553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          continue;
310653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
310753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (rc == ESC_E) continue;
310853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        *errorcodeptr = ERR40;
310953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        return -1;
311053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
311153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
311253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
3113f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
311453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* We have the next character in the name. */
3115f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
311653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
311753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (utf)
311853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
311953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (code == NULL)   /* Just want the length */
312053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
312153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if PCRE2_CODE_UNIT_WIDTH == 8
312253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      int i;
312353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      for (i = 0; i < PRIV(utf8_table1_size); i++)
312453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if ((int)x <= PRIV(utf8_table1)[i]) break;
312553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      arglen += i;
312653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#elif PCRE2_CODE_UNIT_WIDTH == 16
312753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (x > 0xffff) arglen++;
312853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
312953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
313053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    else
313153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
313253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      PCRE2_UCHAR cbuff[8];
313353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      x = PRIV(ord2utf)(x, cbuff);
313453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      memcpy(code, cbuff, CU2BYTES(x));
313553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      code += x;
313653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
313753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
313853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  else
313953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif  /* SUPPORT_UNICODE */
3140f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
314153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* Not UTF */
314253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
31438b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis    if (code != NULL) *code++ = (PCRE2_UCHAR)x;
314453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
3145f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
314653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  arglen++;
314753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
314853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if ((unsigned int)arglen > MAX_MARK)
3149f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
315053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *errorcodeptr = ERR76;
315153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *ptrptr = ptr;
315253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    return -1;
3153f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
3154f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
3155f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
315653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Update the pointers before returning. */
315753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
315853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*ptrptr = ptr;
315953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (codeptr != NULL) *codeptr = code;
316053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn arglen;
316153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis}
3162f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3163f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3164f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3165f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*************************************************
316653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*          Macro for the next two functions      *
3167f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/
3168f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
316953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Both scan_for_captures() and compile_branch() use this macro to generate a
317053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfragment of code that reads the characters of a name and sets its length
317153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis(checking for not being too long). Count the characters dynamically, to avoid
317253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe possibility of integer overflow. The same macro is used for reading *VERB
317353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnames. */
317453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
317553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define READ_NAME(ctype, errno, errset)                      \
317653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  namelen = 0;                                               \
317753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype) != 0)   \
317853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {                                                        \
317953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    ptr++;                                                   \
318053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    namelen++;                                               \
318153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (namelen > MAX_NAME_SIZE)                             \
318253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {                                                      \
318353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      errset = errno;                                        \
318453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      goto FAILED;                                           \
318553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }                                                      \
318653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
3187f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3188f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3189f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3190f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*************************************************
319153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*      Scan regex to identify named groups       *
3192f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/
3193f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
319453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function is called first of all, to scan for named capturing groups so
319553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthat information about them is fully available to both the compiling scans.
319653e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisIt skips over everything except parenthesized items.
3197f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3198f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments:
319953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ptrptr   points to pointer to the start of the pattern
320053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  options  compiling dynamic options
320153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb       pointer to the compile data block
3202f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
320353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns:   zero on success or a non-zero error code, with pointer updated
3204f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/
3205f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
320653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskistypedef struct nest_save {
320753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uint16_t  nest_depth;
320853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uint16_t  reset_group;
320953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uint16_t  max_group;
321053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uint16_t  flags;
321153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis} nest_save;
321253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
321353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define NSF_RESET    0x0001u
321453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define NSF_EXTENDED 0x0002u
321553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define NSF_DUPNAMES 0x0004u
3216f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
32178b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisstatic int scan_for_captures(PCRE2_SPTR *ptrptr, uint32_t options,
321853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  compile_block *cb)
321953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis{
322053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t c;
322153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t delimiter;
322253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t set, unset, *optset;
32238b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisuint32_t skiptoket = 0;
32248b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisuint16_t nest_depth = 0;
322553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint errorcode = 0;
322653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint escape;
322753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint namelen;
322853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint i;
322953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisBOOL inescq = FALSE;
323053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisBOOL isdupname;
323153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisBOOL utf = (options & PCRE2_UTF) != 0;
323253e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisBOOL negate_class;
323353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR name;
323453e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR start;
323553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR ptr = *ptrptr;
323653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnamed_group *ng;
323753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnest_save *top_nest = NULL;
323853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnest_save *end_nests = (nest_save *)(cb->start_workspace + cb->workspace_size);
323953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
324053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* The size of the nest_save structure might not be a factor of the size of the
324153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisworkspace. Therefore we must round down end_nests so as to correctly avoid
324253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscreating a nest_save that spans the end of the workspace. */
324353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
324453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisend_nests = (nest_save *)((char *)end_nests -
324553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ((cb->workspace_size * sizeof(PCRE2_UCHAR)) % sizeof(nest_save)));
324653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
324753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Now scan the pattern */
324853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
324953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (; ptr < cb->end_pattern; ptr++)
3250f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
325153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  c = *ptr;
325253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
325353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* Parenthesized groups set skiptoket when all following characters up to the
325453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  next closing parenthesis must be ignored. The parenthesis itself must be
325553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  processed (to end the nested parenthesized item). */
325653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
32578b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis  if (skiptoket != 0)
3258f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
325953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (c != CHAR_RIGHT_PARENTHESIS) continue;
32608b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis    skiptoket = 0;
3261f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
3262f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
326353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* Skip over literals */
3264f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
326553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (inescq)
3266f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
326753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
326853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
326953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      inescq = FALSE;
327053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      ptr++;
327153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
327253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    continue;
3273f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
3274f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
32758b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis  /* Skip over # comments and whitespace in extended mode. */
327653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
327753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if ((options & PCRE2_EXTENDED) != 0)
327853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
32798b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis    PCRE2_SPTR wscptr = ptr;
32808b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis    while (MAX_255(c) && (cb->ctypes[c] & ctype_space) != 0) c = *(++ptr);
32818b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis    if (c == CHAR_NUMBER_SIGN)
328253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
328353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      ptr++;
32848b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      while (ptr < cb->end_pattern)
328553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
328653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (IS_NEWLINE(ptr))         /* For non-fixed-length newline cases, */
328753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {                          /* IS_NEWLINE sets cb->nllen. */
328853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          ptr += cb->nllen;
328953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          break;
329053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
329153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        ptr++;
329253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
329353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (utf) FORWARDCHAR(ptr);
329453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
329553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
32968b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      }
32978b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis
32988b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis    /* If we skipped any characters, restart the loop. Otherwise, we didn't see
32998b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis    a comment. */
33008b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis
33018b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis    if (ptr > wscptr)
33028b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      {
33038b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      ptr--;
33048b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      continue;
330553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
330653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
33070ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes
330853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* Process the next pattern item. */
33090ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes
331053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  switch(c)
331153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
331253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    default:              /* Most characters are just skipped */
331353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
3314f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
331553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Skip escapes except for \Q */
3316f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
331753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case CHAR_BACKSLASH:
331853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    errorcode = 0;
331953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    escape = PRIV(check_escape)(&ptr, cb->end_pattern, &c, &errorcode, options,
332053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      FALSE, cb);
332153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (errorcode != 0) goto FAILED;
332253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (escape == ESC_Q) inescq = TRUE;
332353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
3324f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
332553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Skip a character class. The syntax is complicated so we have to
332653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    replicate some of what happens when a class is processed for real. */
3327f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
332853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case CHAR_LEFT_SQUARE_BRACKET:
332953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (PRIV(strncmp_c8)(ptr+1, STRING_WEIRD_STARTWORD, 6) == 0 ||
333053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        PRIV(strncmp_c8)(ptr+1, STRING_WEIRD_ENDWORD, 6) == 0)
333153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
333253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      ptr += 6;
333353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      break;
333453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
3335f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
333653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* If the first character is '^', set the negation flag (not actually used
333753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    here, except to recognize only one ^) and skip it. If the first few
333853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    characters (either before or after ^) are \Q\E or \E we skip them too. This
333953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    makes for compatibility with Perl. */
3340f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
334153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    negate_class = FALSE;
334253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    for (;;)
334353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
334453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      c = *(++ptr);   /* First character in class */
334553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (c == CHAR_BACKSLASH)
334653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
334753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (ptr[1] == CHAR_E)
334853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          ptr++;
334953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        else if (PRIV(strncmp_c8)(ptr + 1, STR_Q STR_BACKSLASH STR_E, 3) == 0)
335053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          ptr += 3;
335153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        else
335253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          break;
335353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
335453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
335553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        negate_class = TRUE;
335653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      else break;
335753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
3358f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
335953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (c == CHAR_RIGHT_SQUARE_BRACKET &&
336053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        (cb->external_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)
336153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      break;
3362f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
336353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Loop for the contents of the class */
3364f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
336553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    for (;;)
336653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
336753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      PCRE2_SPTR tempptr;
3368f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
336953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (c == CHAR_NULL && ptr >= cb->end_pattern)
337053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
337153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        errorcode = ERR6;  /* Missing terminating ']' */
337253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        goto FAILED;
337353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
3374f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
337553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
337653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (utf && HAS_EXTRALEN(c))
337753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {                           /* Braces are required because the */
337853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
337953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
338053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
3381f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
338253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Inside \Q...\E everything is literal except \E */
3383f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
338453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (inescq)
338553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
338653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)  /* If we are at \E */
338753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
338853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          inescq = FALSE;                   /* Reset literal state */
338953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          ptr++;                            /* Skip the 'E' */
339053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
339153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        goto CONTINUE_CLASS;
339253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
3393f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
339453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Skip POSIX class names. */
339553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (c == CHAR_LEFT_SQUARE_BRACKET &&
339653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
339753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis           ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr))
339853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
339953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        ptr = tempptr + 1;
340053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
340153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      else if (c == CHAR_BACKSLASH)
340253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
340353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        errorcode = 0;
340453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        escape = PRIV(check_escape)(&ptr, cb->end_pattern, &c, &errorcode,
340553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          options, TRUE, cb);
340653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (errorcode != 0) goto FAILED;
340753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (escape == ESC_Q) inescq = TRUE;
340853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
340953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
341053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      CONTINUE_CLASS:
341153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      c = *(++ptr);
341253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (c == CHAR_RIGHT_SQUARE_BRACKET && !inescq) break;
341353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }     /* End of class-processing loop */
341453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
3415f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
341653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* This is the real work of this function - handling parentheses. */
3417f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
341853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case CHAR_LEFT_PARENTHESIS:
341953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    nest_depth++;
3420f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
342153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (ptr[1] != CHAR_QUESTION_MARK)
342253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
342353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (ptr[1] != CHAR_ASTERISK)
342453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
342553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if ((options & PCRE2_NO_AUTO_CAPTURE) == 0) cb->bracount++;
342653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
3427f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
34288b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      /* (*something) - skip over a name, and then just skip to closing ket
34298b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      unless PCRE2_ALT_VERBNAMES is set, in which case we have to process
34308b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      escapes in the string after a verb name terminated by a colon. */
3431f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
343253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      else
343353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
343453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        ptr += 2;
343553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) != 0) ptr++;
34368b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis        if (*ptr == CHAR_COLON && (options & PCRE2_ALT_VERBNAMES) != 0)
343753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
343853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          ptr++;
34398b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis          if (process_verb_name(&ptr, NULL, &errorcode, options, utf, cb) < 0)
34408b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis            goto FAILED;
34418b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis          }
34428b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis        else
34438b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis          {
34448b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis          while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS)
34458b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis            ptr++;
344653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
344753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        nest_depth--;
344853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
344953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
3450f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
345153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Handle (?...) groups */
3452f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
345353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    else switch(ptr[2])
345453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
345553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      default:
345653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      ptr += 2;
345753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (ptr[0] == CHAR_R ||                           /* (?R) */
345853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          ptr[0] == CHAR_NUMBER_SIGN ||                 /* (?#) */
345953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          IS_DIGIT(ptr[0]) ||                           /* (?n) */
346053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          (ptr[0] == CHAR_MINUS && IS_DIGIT(ptr[1])))   /* (?-n) */
346153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
34628b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis        skiptoket = ptr[0];
346353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        break;
346453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
3465f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
346653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Handle (?| and (?imsxJU: which are the only other valid forms. Both
346753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      need a new block on the nest stack. */
3468f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
346953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (top_nest == NULL) top_nest = (nest_save *)(cb->start_workspace);
347053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      else if (++top_nest >= end_nests)
347153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
347253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        errorcode = ERR84;
347353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        goto FAILED;
347453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
347553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      top_nest->nest_depth = nest_depth;
347653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      top_nest->flags = 0;
347753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if ((options & PCRE2_EXTENDED) != 0) top_nest->flags |= NSF_EXTENDED;
347853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if ((options & PCRE2_DUPNAMES) != 0) top_nest->flags |= NSF_DUPNAMES;
3479f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
348053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (*ptr == CHAR_VERTICAL_LINE)
348153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
34828b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis        top_nest->reset_group = (uint16_t)cb->bracount;
34838b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis        top_nest->max_group = (uint16_t)cb->bracount;
348453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        top_nest->flags |= NSF_RESET;
348553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        cb->external_flags |= PCRE2_DUPCAPUSED;
348653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        break;
348753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
3488f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
348953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Scan options */
3490f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
349153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      top_nest->reset_group = 0;
349253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      top_nest->max_group = 0;
3493f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
349453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      set = unset = 0;
349553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      optset = &set;
3496f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
349753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Need only track (?x: and (?J: at this stage */
3498f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
349953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON)
350053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
350153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        switch (*ptr++)
350253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
350353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          case CHAR_MINUS: optset = &unset; break;
3504f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
350553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          case CHAR_x: *optset |= PCRE2_EXTENDED; break;
3506f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
350753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          case CHAR_J:
350853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          *optset |= PCRE2_DUPNAMES;
350953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          cb->external_flags |= PCRE2_JCHANGED;
351053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          break;
3511f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
351253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          case CHAR_i:
351353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          case CHAR_m:
351453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          case CHAR_s:
351553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          case CHAR_U:
351653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          break;
3517f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
35188b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis          default:
35198b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis          errorcode = ERR11;
35208b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis          ptr--;    /* Correct the offset */
35218b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis          goto FAILED;
352253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
352353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
3524f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
352553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      options = (options | set) & (~unset);
3526f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
352753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* If the options ended with ')' this is not the start of a nested
352853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      group with option changes, so the options change at this level. If the
352953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      previous level set up a nest block, discard the one we have just created.
353053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      Otherwise adjust it for the previous level. */
3531f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
353253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (*ptr == CHAR_RIGHT_PARENTHESIS)
353353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
353453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        nest_depth--;
353553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (top_nest > (nest_save *)(cb->start_workspace) &&
353653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            (top_nest-1)->nest_depth == nest_depth) top_nest --;
353753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        else top_nest->nest_depth = nest_depth;
353853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
353953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      break;
3540f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
354153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Skip over a numerical or string argument for a callout. */
3542f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
354353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case CHAR_C:
354453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      ptr += 2;
354553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (ptr[1] == CHAR_RIGHT_PARENTHESIS) break;
354653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (IS_DIGIT(ptr[1]))
354753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
354853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        while (IS_DIGIT(ptr[1])) ptr++;
354953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
3550f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
355153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Handle a string argument */
3552f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
355353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      else
355453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
355553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        ptr++;
355653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        delimiter = 0;
355753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
355853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
355953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (*ptr == PRIV(callout_start_delims)[i])
356053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {
356153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            delimiter = PRIV(callout_end_delims)[i];
356253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            break;
356353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            }
356453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
3565f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
356653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (delimiter == 0)
356753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
356853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          errorcode = ERR82;
356953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          goto FAILED;
357053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
3571f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
357253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        start = ptr;
357353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        do
357453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
357553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (++ptr >= cb->end_pattern)
357653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {
357753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            errorcode = ERR81;
357853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            ptr = start;   /* To give a more useful message */
357953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            goto FAILED;
358053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            }
358153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (ptr[0] == delimiter && ptr[1] == delimiter) ptr += 2;
358253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
358353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        while (ptr[0] != delimiter);
358453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
3585f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
358653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Check terminating ) */
3587f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
358853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (ptr[1] != CHAR_RIGHT_PARENTHESIS)
35898366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes        {
359053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        errorcode = ERR39;
359153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        ptr++;
359253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        goto FAILED;
35938366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes        }
359453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      break;
3595f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
359653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Conditional group */
3597f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
359853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case CHAR_LEFT_PARENTHESIS:
359953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (ptr[3] != CHAR_QUESTION_MARK)   /* Not assertion or callout */
360053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
360153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        nest_depth++;
360253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        ptr += 2;
360353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        break;
360453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
3605f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
360653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Must be an assertion or a callout */
3607f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
360853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      switch(ptr[4])
360953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       {
361053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       case CHAR_LESS_THAN_SIGN:
361153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       if (ptr[5] != CHAR_EXCLAMATION_MARK && ptr[5] != CHAR_EQUALS_SIGN)
361253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis         goto MISSING_ASSERTION;
361353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       /* Fall through */
361453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
361553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       case CHAR_C:
361653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       case CHAR_EXCLAMATION_MARK:
361753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       case CHAR_EQUALS_SIGN:
361853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       ptr++;
361953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       break;
3620f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
362153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       default:
362253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       MISSING_ASSERTION:
362353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       ptr += 3;            /* To improve error message */
362453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       errorcode = ERR28;
362553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       goto FAILED;
362653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       }
362753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      break;
3628f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
362953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case CHAR_COLON:
363053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case CHAR_GREATER_THAN_SIGN:
363153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case CHAR_EQUALS_SIGN:
363253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case CHAR_EXCLAMATION_MARK:
363353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case CHAR_AMPERSAND:
363453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case CHAR_PLUS:
363553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      ptr += 2;
363653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      break;
3637f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
363853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case CHAR_P:
363953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (ptr[3] != CHAR_LESS_THAN_SIGN)
364053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
364153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        ptr += 3;
364253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        break;
364353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
364453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      ptr++;
364553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      c = CHAR_GREATER_THAN_SIGN;   /* Terminator */
364653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      goto DEFINE_NAME;
3647f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
364853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case CHAR_LESS_THAN_SIGN:
364953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (ptr[3] == CHAR_EQUALS_SIGN || ptr[3] == CHAR_EXCLAMATION_MARK)
365053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
365153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        ptr += 3;
365253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        break;
365353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
365453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      c = CHAR_GREATER_THAN_SIGN;   /* Terminator */
365553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      goto DEFINE_NAME;
3656f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
365753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      case CHAR_APOSTROPHE:
365853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      c = CHAR_APOSTROPHE;    /* Terminator */
3659f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
366053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      DEFINE_NAME:
366153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      name = ptr = ptr + 3;
3662f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
366353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (*ptr == c)          /* Empty name */
366453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
366553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        errorcode = ERR62;
366653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        goto FAILED;
366753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
3668f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
366953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (IS_DIGIT(*ptr))
367053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
367153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        errorcode = ERR44;   /* Group name must start with non-digit */
367253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        goto FAILED;
367353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
3674f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
367553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_word) == 0)
367653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
367753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        errorcode = ERR24;
367853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        goto FAILED;
367953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
3680f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
368153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Advance ptr, set namelen and check its length. */
368253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      READ_NAME(ctype_word, ERR48, errorcode);
368353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
368453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (*ptr != c)
368553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
368653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        errorcode = ERR42;
368753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        goto FAILED;
368853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
3689f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
369053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (cb->names_found >= MAX_NAME_COUNT)
369153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
369253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        errorcode = ERR49;
369353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        goto FAILED;
369453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
3695f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
369653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (namelen + IMM2_SIZE + 1 > cb->name_entry_size)
36978b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis        cb->name_entry_size = (uint16_t)(namelen + IMM2_SIZE + 1);
3698f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
369953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* We have a valid name for this capturing group. */
3700f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
370153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      cb->bracount++;
3702f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
370353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Scan the list to check for duplicates. For duplicate names, if the
370453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      number is the same, break the loop, which causes the name to be
370553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      discarded; otherwise, if DUPNAMES is not set, give an error.
370653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      If it is set, allow the name with a different number, but continue
370753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      scanning in case this is a duplicate with the same number. For
370853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      non-duplicate names, give an error if the number is duplicated. */
3709f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
371053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      isdupname = FALSE;
371153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      ng = cb->named_groups;
371253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      for (i = 0; i < cb->names_found; i++, ng++)
371353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
371453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (namelen == ng->length &&
37158b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis            PRIV(strncmp)(name, ng->name, (size_t)namelen) == 0)
371653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
371753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (ng->number == cb->bracount) break;
371853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if ((options & PCRE2_DUPNAMES) == 0)
371953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {
372053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            errorcode = ERR43;
372153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            goto FAILED;
372253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            }
372353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          isdupname = ng->isdup = TRUE;     /* Mark as a duplicate */
372453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          cb->dupnames = TRUE;              /* Duplicate names exist */
372553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
372653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        else if (ng->number == cb->bracount)
372753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
372853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          errorcode = ERR65;
372953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          goto FAILED;
373053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
373153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
3732f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
373353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (i < cb->names_found) break;   /* Ignore duplicate with same number */
3734f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
373553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Increase the list size if necessary */
3736f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
373753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (cb->names_found >= cb->named_group_list_size)
373853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
37398b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis        uint32_t newsize = cb->named_group_list_size * 2;
374053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        named_group *newspace =
374153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          cb->cx->memctl.malloc(newsize * sizeof(named_group),
374253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          cb->cx->memctl.memory_data);
374353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (newspace == NULL)
374453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
374553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          errorcode = ERR21;
374653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          goto FAILED;
374753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
3748f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
374953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        memcpy(newspace, cb->named_groups,
375053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          cb->named_group_list_size * sizeof(named_group));
375153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (cb->named_group_list_size > NAMED_GROUP_LIST_SIZE)
375253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          cb->cx->memctl.free((void *)cb->named_groups,
375353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          cb->cx->memctl.memory_data);
375453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        cb->named_groups = newspace;
375553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        cb->named_group_list_size = newsize;
375653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
3757f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
375853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Add this name to the list */
3759f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
376053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      cb->named_groups[cb->names_found].name = name;
37618b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      cb->named_groups[cb->names_found].length = (uint16_t)namelen;
376253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      cb->named_groups[cb->names_found].number = cb->bracount;
37638b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      cb->named_groups[cb->names_found].isdup = (uint16_t)isdupname;
376453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      cb->names_found++;
376553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      break;
376653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }        /* End of (? switch */
376753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;     /* End of ( handling */
3768f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
376953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* At an alternation, reset the capture count if we are in a (?| group. */
3770f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
377153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case CHAR_VERTICAL_LINE:
377253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (top_nest != NULL && top_nest->nest_depth == nest_depth &&
377353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        (top_nest->flags & NSF_RESET) != 0)
377453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
377553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (cb->bracount > top_nest->max_group)
37768b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis        top_nest->max_group = (uint16_t)cb->bracount;
377753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      cb->bracount = top_nest->reset_group;
377853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
377953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
3780f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
378153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* At a right parenthesis, reset the capture count to the maximum if we
378253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    are in a (?| group and/or reset the extended option. */
378353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
378453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case CHAR_RIGHT_PARENTHESIS:
378553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (top_nest != NULL && top_nest->nest_depth == nest_depth)
378653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
378753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if ((top_nest->flags & NSF_RESET) != 0 &&
378853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          top_nest->max_group > cb->bracount)
378953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        cb->bracount = top_nest->max_group;
379053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if ((top_nest->flags & NSF_EXTENDED) != 0) options |= PCRE2_EXTENDED;
379153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        else options &= ~PCRE2_EXTENDED;
379253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if ((top_nest->flags & NSF_DUPNAMES) != 0) options |= PCRE2_DUPNAMES;
379353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        else options &= ~PCRE2_DUPNAMES;
379453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (top_nest == (nest_save *)(cb->start_workspace)) top_nest = NULL;
379553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        else top_nest--;
379653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
37978b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis    if (nest_depth == 0)    /* Unmatched closing parenthesis */
37988b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      {
37998b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      errorcode = ERR22;
38008b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      goto FAILED;
38018b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      }
38028b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis    nest_depth--;
380353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    break;
380453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
3805f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
380653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
38078b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisif (nest_depth == 0)
38088b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis  {
38098b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis  cb->final_bracount = cb->bracount;
38108b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis  return 0;
38118b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis  }
38128b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis
38138b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis/* We give a special error for a missing closing parentheses after (?# because
38148b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisit might otherwise be hard to see where the missing character is. */
38158b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis
38168b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskiserrorcode = (skiptoket == CHAR_NUMBER_SIGN)? ERR18 : ERR14;
381753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
381853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisFAILED:
381953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*ptrptr = ptr;
382053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn errorcode;
3821f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}
3822f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3823f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3824f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3825f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*************************************************
3826f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*           Compile one branch                   *
3827f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/
3828f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3829f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Scan the pattern, compiling it into the a vector. If the options are
3830f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichchanged during the branch, the pointer is used to change the external options
3831f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbits. This function is used during the pre-compile phase when we are trying
3832f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichto find out the amount of memory needed, as well as during the real compile
3833f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichphase. The value of lengthptr distinguishes the two phases.
3834f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3835f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments:
3836f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  optionsptr        pointer to the option bits
3837f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  codeptr           points to the pointer to the current code point
3838f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  ptrptr            points to the current pattern pointer
3839f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  errorcodeptr      points to error code variable
384053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  firstcuptr        place to put the first required code unit
384153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  firstcuflagsptr   place to put the first code unit flags, or a negative number
384253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  reqcuptr          place to put the last required code unit
384353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  reqcuflagsptr     place to put the last required code unit flags, or a negative number
3844f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  bcptr             points to current branch chain
3845f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  cond_depth        conditional nesting depth
384653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb                contains pointers to tables etc.
3847f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  lengthptr         NULL during the real compile phase
3848f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                    points to length accumulator during pre-compile phase
3849f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3850f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichReturns:            TRUE on success
3851f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                    FALSE, with *errorcodeptr set non-zero on error
3852f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/
3853f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3854f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic BOOL
385553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscompile_branch(uint32_t *optionsptr, PCRE2_UCHAR **codeptr,
385653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  PCRE2_SPTR *ptrptr, int *errorcodeptr,
385753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uint32_t *firstcuptr, int32_t *firstcuflagsptr,
385853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uint32_t *reqcuptr, int32_t *reqcuflagsptr,
3859f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  branch_chain *bcptr, int cond_depth,
386053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  compile_block *cb, size_t *lengthptr)
3861f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{
3862f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichint repeat_min = 0, repeat_max = 0;      /* To please picky compilers */
3863f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichint bravalue = 0;
386453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t greedy_default, greedy_non_default;
386553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t repeat_type, op_type;
386653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t options = *optionsptr;               /* May change dynamically */
386753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t firstcu, reqcu;
386853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint32_t firstcuflags, reqcuflags;
386953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t zeroreqcu, zerofirstcu;
387053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint32_t zeroreqcuflags, zerofirstcuflags;
387153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint32_t req_caseopt, reqvary, tempreqvary;
3872f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichint after_manual_callout = 0;
3873f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichint escape;
387453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissize_t length_prevgroup = 0;
387553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisregister uint32_t c;
387653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisregister PCRE2_UCHAR *code = *codeptr;
387753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *last_code = code;
387853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *orig_code = code;
387953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *tempcode;
3880f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichBOOL inescq = FALSE;
388153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisBOOL groupsetfirstcu = FALSE;
388253e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR ptr = *ptrptr;
388353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR tempptr;
388453e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *previous = NULL;
388553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *previous_callout = NULL;
388653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint8_t classbits[32];
388753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
388853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* We can fish out the UTF setting once and for all into a BOOL, but we must
388953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnot do this for other options (e.g. PCRE2_EXTENDED) because they may change
3890f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichdynamically as we process the pattern. */
3891f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
389253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
389353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisBOOL utf = (options & PCRE2_UTF) != 0;
389453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if PCRE2_CODE_UNIT_WIDTH != 32
389553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR utf_units[6];      /* For setting up multi-cu chars */
3896f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
389753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
389853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else  /* No UTF support */
3899f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichBOOL utf = FALSE;
3900f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
3901f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3902f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Helper variables for OP_XCLASS opcode (for characters > 255). We define
3903f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichclass_uchardata always so that it can be passed to add_to_class() always,
3904f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichthough it will not be used in non-UTF 8-bit cases. This avoids having to supply
3905f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichalternative calls for the different cases. */
3906f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
390753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *class_uchardata;
390853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_WIDE_CHARS
3909f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichBOOL xclass;
391053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *class_uchardata_base;
3911f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
3912f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3913f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Set up the default and non-default settings for greediness */
3914f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
391553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisgreedy_default = ((options & PCRE2_UNGREEDY) != 0);
3916f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichgreedy_non_default = greedy_default ^ 1;
3917f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
391853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Initialize no first unit, no required unit. REQ_UNSET means "no char
3919f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichmatching encountered yet". It gets changed to REQ_NONE if we hit something that
392053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismatches a non-fixed first unit; reqcu just remains unset if we never find one.
3921f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3922f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichWhen we hit a repeat whose minimum is zero, we may have to adjust these values
3923f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichto take the zero repeat into account. This is implemented by setting them to
392453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiszerofirstcu and zeroreqcu when such a repeat is encountered. The individual
3925f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichitem types that can be repeated set these backoff variables appropriately. */
3926f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
392753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfirstcu = reqcu = zerofirstcu = zeroreqcu = 0;
392853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfirstcuflags = reqcuflags = zerofirstcuflags = zeroreqcuflags = REQ_UNSET;
3929f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
393053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* The variable req_caseopt contains either the REQ_CASELESS value or zero,
393153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisaccording to the current setting of the caseless flag. The REQ_CASELESS value
393253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisleaves the lower 28 bit empty. It is added into the firstcu or reqcu variables
393353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisto record the case status of the value. This is used only for ASCII characters.
393453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/
3935f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
393653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreq_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS:0;
3937f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3938f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Switch on next character until the end of the branch */
3939f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3940f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichfor (;; ptr++)
3941f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
3942f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  BOOL negate_class;
3943f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  BOOL should_flip_negation;
394453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  BOOL match_all_or_no_wide_chars;
3945f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  BOOL possessive_quantifier;
3946f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  BOOL is_quantifier;
3947f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  BOOL is_recurse;
394853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  BOOL is_dupname;
3949f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  BOOL reset_bracount;
3950f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  int class_has_8bitchar;
3951f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  int class_one_char;
395253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_WIDE_CHARS
3953f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  BOOL xclass_has_prop;
3954f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
395553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  int recno;                               /* Must be signed */
395653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  int refsign;                             /* Must be signed */
395753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  int terminator;                          /* Must be signed */
3958f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  unsigned int mclength;
3959f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  unsigned int tempbracount;
396053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uint32_t ec;
396153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uint32_t newoptions;
396253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uint32_t skipunits;
396353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uint32_t subreqcu, subfirstcu;
396453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  int32_t subreqcuflags, subfirstcuflags;  /* Must be signed */
396553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  PCRE2_UCHAR mcbuffer[8];
3966f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
39678b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis  /* Come here to restart the loop. */
39688b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis
39698b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis  REDO_LOOP:
39708b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis
3971f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  /* Get next character in the pattern */
3972f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3973f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  c = *ptr;
3974f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3975f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  /* If we are at the end of a nested substitution, revert to the outer level
397653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  string. Nesting only happens one or two levels deep, and the inserted string
397753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  is always zero terminated. */
3978f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
397953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (c == CHAR_NULL && cb->nestptr[0] != NULL)
3980f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
398153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    ptr = cb->nestptr[0];
398253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    cb->nestptr[0] = cb->nestptr[1];
398353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    cb->nestptr[1] = NULL;
3984f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    c = *ptr;
3985f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
3986f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3987f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  /* If we are in the pre-compile phase, accumulate the length used for the
3988f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  previous cycle of this loop. */
3989f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
3990f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  if (lengthptr != NULL)
3991f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
399253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (code > cb->start_workspace + cb->workspace_size -
3993f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        WORK_SIZE_SAFETY_MARGIN)                       /* Check for overrun */
3994f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
399553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      *errorcodeptr = (code >= cb->start_workspace + cb->workspace_size)?
399653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        ERR52 : ERR86;
3997f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      goto FAILED;
3998f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
3999f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4000f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* There is at least one situation where code goes backwards: this is the
4001f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case of a zero quantifier after a class (e.g. [ab]{0}). At compile time,
4002f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    the class is simply eliminated. However, it is created first, so we have to
4003f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    allow memory for it. Therefore, don't ever reduce the length at this point.
4004f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    */
4005f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4006f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (code < last_code) code = last_code;
4007f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4008f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Paranoid check for integer overflow */
4009f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
401053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (OFLOW_MAX - *lengthptr < (size_t)(code - last_code))
4011f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
4012f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      *errorcodeptr = ERR20;
4013f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      goto FAILED;
4014f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
40158b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis    *lengthptr += (size_t)(code - last_code);
4016f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4017f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* If "previous" is set and it is not at the start of the work space, move
4018f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    it back to there, in order to avoid filling up the work space. Otherwise,
4019f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if "previous" is NULL, reset the current code pointer to the start. */
4020f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4021f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (previous != NULL)
4022f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
4023f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (previous > orig_code)
4024f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
40258b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis        memmove(orig_code, previous, (size_t)CU2BYTES(code - previous));
4026f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        code -= previous - orig_code;
4027f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        previous = orig_code;
4028f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
4029f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
4030f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    else code = orig_code;
4031f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4032f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Remember where this code item starts so we can pick up the length
4033f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    next time round. */
4034f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4035f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    last_code = code;
4036f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
4037f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
403853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* Before doing anything else we must handle all the special items that do
403953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  nothing, and which may come between an item and its quantifier. Otherwise,
404053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  when auto-callouts are enabled, a callout gets incorrectly inserted before
404153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  the quantifier is recognized. After recognizing a "do nothing" item, restart
404253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  the loop in case another one follows. */
4043f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
404453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* If c is not NULL we are not at the end of the pattern. If it is NULL, we
404553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  may still be in the pattern with a NULL data item. In these cases, if we are
404653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  in \Q...\E, check for the \E that ends the literal string; if not, we have a
404753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  literal character. If not in \Q...\E, an isolated \E is ignored. */
4048f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
404953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (c != CHAR_NULL || ptr < cb->end_pattern)
4050f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
4051f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
4052f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
4053f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      inescq = FALSE;
4054f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      ptr++;
4055f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      continue;
4056f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
405753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    else if (inescq)   /* Literal character */
4058f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
4059f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (previous_callout != NULL)
4060f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
4061f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (lengthptr == NULL)  /* Don't attempt in pre-compile phase */
406253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          complete_callout(previous_callout, ptr, cb);
4063f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        previous_callout = NULL;
4064f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
406553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if ((options & PCRE2_AUTO_CALLOUT) != 0)
4066f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
4067f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        previous_callout = code;
406853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        code = auto_callout(code, ptr, cb);
4069f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
4070f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      goto NORMAL_CHAR;
4071f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
407253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
407353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Check for the start of a \Q...\E sequence. We must do this here rather
407453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    than later in case it is immediately followed by \E, which turns it into a
407553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    "do nothing" sequence. */
407653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
407753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (c == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
407853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
407953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      inescq = TRUE;
408053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      ptr++;
408153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      continue;
408253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
4083f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
4084f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
408553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* In extended mode, skip white space and #-comments that end at newline. */
4086f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
408753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if ((options & PCRE2_EXTENDED) != 0)
4088f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
408953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    PCRE2_SPTR wscptr = ptr;
409053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    while (MAX_255(c) && (cb->ctypes[c] & ctype_space) != 0) c = *(++ptr);
409153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (c == CHAR_NUMBER_SIGN)
4092f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
4093f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      ptr++;
409453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      while (ptr < cb->end_pattern)
4095f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
4096f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (IS_NEWLINE(ptr))         /* For non-fixed-length newline cases, */
409753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {                          /* IS_NEWLINE sets cb->nllen. */
409853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          ptr += cb->nllen;
4099f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          break;
4100f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
4101f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        ptr++;
410253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
4103f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (utf) FORWARDCHAR(ptr);
4104f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
4105f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
4106f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
4107f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
410853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* If we skipped any characters, restart the loop. Otherwise, we didn't see
410953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    a comment. */
4110f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
41118b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis    if (ptr > wscptr) goto REDO_LOOP;
411253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
4113f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
411453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* Skip over (?# comments. */
4115f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
411653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (c == CHAR_LEFT_PARENTHESIS && ptr[1] == CHAR_QUESTION_MARK &&
411753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      ptr[2] == CHAR_NUMBER_SIGN)
4118f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
411953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    ptr += 3;
412053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
412153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (*ptr != CHAR_RIGHT_PARENTHESIS)
412253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
412353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      *errorcodeptr = ERR18;
412453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      goto FAILED;
412553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
412653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    continue;
4127f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
4128f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
412953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* End of processing "do nothing" items. See if the next thing is a
413053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  quantifier. */
413153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
413253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  is_quantifier =
413353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
413453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis     (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
413553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
413653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* Fill in length of a previous callout and create an auto callout if
413753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  required, except when the next thing is a quantifier or when processing a
413853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  property substitution string for \w etc in UCP mode. */
4139f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
414053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (!is_quantifier && cb->nestptr[0] == NULL)
4141f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
414253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (previous_callout != NULL && after_manual_callout-- <= 0)
414353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
414453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (lengthptr == NULL)      /* Don't attempt in pre-compile phase */
414553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        complete_callout(previous_callout, ptr, cb);
414653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      previous_callout = NULL;
414753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
414853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
414953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if ((options & PCRE2_AUTO_CALLOUT) != 0)
415053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
415153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      previous_callout = code;
415253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      code = auto_callout(code, ptr, cb);
415353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
4154f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
4155f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4156f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  /* Process the next pattern item. */
4157f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4158f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  switch(c)
4159f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
4160f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* ===================================================================*/
416153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* The branch terminates at string end or | or ) */
416253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
416353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case CHAR_NULL:
416453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (ptr < cb->end_pattern) goto NORMAL_CHAR;   /* Zero data character */
416553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Fall through */
416653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
416753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    case CHAR_VERTICAL_LINE:
4168f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case CHAR_RIGHT_PARENTHESIS:
416953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *firstcuptr = firstcu;
417053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *firstcuflagsptr = firstcuflags;
417153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *reqcuptr = reqcu;
417253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *reqcuflagsptr = reqcuflags;
4173f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    *codeptr = code;
4174f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    *ptrptr = ptr;
4175f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (lengthptr != NULL)
4176f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
417753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (OFLOW_MAX - *lengthptr < (size_t)(code - last_code))
4178f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
4179f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        *errorcodeptr = ERR20;
4180f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        goto FAILED;
4181f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
41828b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      *lengthptr += (size_t)(code - last_code);  /* To include callout length */
4183f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
4184f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    return TRUE;
4185f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4186f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4187f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* ===================================================================*/
4188f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Handle single-character metacharacters. In multiline mode, ^ disables
4189f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    the setting of any following char as a first character. */
4190f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4191f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case CHAR_CIRCUMFLEX_ACCENT:
4192f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    previous = NULL;
419353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if ((options & PCRE2_MULTILINE) != 0)
4194f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
419553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (firstcuflags == REQ_UNSET)
419653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        zerofirstcuflags = firstcuflags = REQ_NONE;
4197f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      *code++ = OP_CIRCM;
4198f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
4199f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    else *code++ = OP_CIRC;
4200f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    break;
4201f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4202f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case CHAR_DOLLAR_SIGN:
4203f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    previous = NULL;
420453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *code++ = ((options & PCRE2_MULTILINE) != 0)? OP_DOLLM : OP_DOLL;
4205f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    break;
4206f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4207f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* There can never be a first char if '.' is first, whatever happens about
420853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    repeats. The value of reqcu doesn't change either. */
4209f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4210f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case CHAR_DOT:
421153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
421253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    zerofirstcu = firstcu;
421353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    zerofirstcuflags = firstcuflags;
421453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    zeroreqcu = reqcu;
421553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    zeroreqcuflags = reqcuflags;
4216f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    previous = code;
421753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *code++ = ((options & PCRE2_DOTALL) != 0)? OP_ALLANY: OP_ANY;
4218f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    break;
4219f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4220f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4221f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* ===================================================================*/
4222f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Character classes. If the included characters are all < 256, we build a
4223f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    32-byte bitmap of the permitted characters, except in the special case
4224f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    where there is only one such character. For negated classes, we build the
4225f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    map as usual, then invert it at the end. However, we use a different opcode
4226f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    so that data characters > 255 can be handled correctly.
4227f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4228f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    If the class contains characters outside the 0-255 range, a different
4229f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    opcode is compiled. It may optionally have a bit map for characters < 256,
4230f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    but those above are are explicitly listed afterwards. A flag byte tells
4231f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    whether the bitmap is present, and whether this is a negated class or not.
4232f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
423353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    An isolated ']' character is not treated specially, so is just another data
423453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    character. In earlier versions of PCRE that used the original API there was
423553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    a "JavaScript compatibility mode" in which it gave an error. However,
423653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    JavaScript itself has changed in this respect so there is no longer any
423753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    need for this special handling.
4238f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
423953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    In another (POSIX) regex library, the ugly syntax [[:<:]] and [[:>:]] is
4240f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    used for "start of word" and "end of word". As these are otherwise illegal
4241f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    sequences, we don't break anything by recognizing them. They are replaced
424253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    by \b(?=\w) and \b(?<=\w) respectively. This can only happen at the top
424353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    nesting level, as no other inserted sequences will contains these oddities.
424453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    Sequences like [a[:<:]] are erroneous and are handled by the normal code
424553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    below. */
4246f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4247f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case CHAR_LEFT_SQUARE_BRACKET:
424853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (PRIV(strncmp_c8)(ptr+1, STRING_WEIRD_STARTWORD, 6) == 0)
4249f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
425053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      cb->nestptr[0] = ptr + 7;
42518b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      ptr = sub_start_of_word;
42528b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      goto REDO_LOOP;
4253f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
4254f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
425553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (PRIV(strncmp_c8)(ptr+1, STRING_WEIRD_ENDWORD, 6) == 0)
4256f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
425753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      cb->nestptr[0] = ptr + 7;
42588b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      ptr = sub_end_of_word;
42598b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis      goto REDO_LOOP;
4260f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
4261f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4262f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Handle a real character class. */
4263f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4264f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    previous = code;
4265f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4266f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* PCRE supports POSIX class stuff inside a class. Perl gives an error if
4267f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    they are encountered at the top level, so we'll do that too. */
4268f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4269f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
4270f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich         ptr[1] == CHAR_EQUALS_SIGN) &&
4271f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        check_posix_syntax(ptr, &tempptr))
4272f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
427353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      *errorcodeptr = (ptr[1] == CHAR_COLON)? ERR12 : ERR13;
4274f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      goto FAILED;
4275f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
4276f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4277f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* If the first character is '^', set the negation flag and skip it. Also,
4278f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if the first few characters (either before or after ^) are \Q\E or \E we
4279f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    skip them too. This makes for compatibility with Perl. */
4280f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4281f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    negate_class = FALSE;
4282f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    for (;;)
4283f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
4284f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      c = *(++ptr);
4285f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (c == CHAR_BACKSLASH)
4286f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
4287f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (ptr[1] == CHAR_E)
4288f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          ptr++;
428953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        else if (PRIV(strncmp_c8)(ptr + 1, STR_Q STR_BACKSLASH STR_E, 3) == 0)
4290f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          ptr += 3;
4291f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        else
4292f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          break;
4293f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
4294f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
4295f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        negate_class = TRUE;
4296f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      else break;
4297f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
4298f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
429953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Empty classes are allowed if PCRE2_ALLOW_EMPTY_CLASS is set. Otherwise,
4300f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    an initial ']' is taken as a data character -- the code below handles
430153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    that. When empty classes are allowed, [] must always fail, so generate
430253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    OP_FAIL, whereas [^] must match any character, so generate OP_ALLANY. */
4303f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4304f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (c == CHAR_RIGHT_SQUARE_BRACKET &&
430553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        (cb->external_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)
4306f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
4307f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      *code++ = negate_class? OP_ALLANY : OP_FAIL;
430853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
430953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      zerofirstcu = firstcu;
431053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      zerofirstcuflags = firstcuflags;
4311f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      break;
4312f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
4313f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
431453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* If a non-extended class contains a negative special such as \S, we need
431553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    to flip the negation flag at the end, so that support for characters > 255
431653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    works correctly (they are all included in the class). An extended class may
431753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    need to insert specific matching or non-matching code for wide characters.
431853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    */
4319f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
432053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    should_flip_negation = match_all_or_no_wide_chars = FALSE;
4321f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4322f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Extended class (xclass) will be used when characters > 255
4323f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    might match. */
4324f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
432553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_WIDE_CHARS
4326f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    xclass = FALSE;
4327f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    class_uchardata = code + LINK_SIZE + 2;   /* For XCLASS items */
4328f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    class_uchardata_base = class_uchardata;   /* Save the start */
4329f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
4330f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4331f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* For optimization purposes, we track some properties of the class:
433253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    class_has_8bitchar will be non-zero if the class contains at least one 256
433353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    character with a code point less than 256; class_one_char will be 1 if the
433453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    class contains just one character; xclass_has_prop will be TRUE if Unicode
433553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    property checks are present in the class. */
4336f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4337f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    class_has_8bitchar = 0;
4338f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    class_one_char = 0;
433953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_WIDE_CHARS
4340f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    xclass_has_prop = FALSE;
4341f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
4342f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
434353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Initialize the 256-bit (32-byte) bit map to all zeros. We build the map
434453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    in a temporary bit of memory, in case the class contains fewer than two
4345f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    8-bit characters because in that case the compiled code doesn't use the bit
4346f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    map. */
4347f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
434853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    memset(classbits, 0, 32 * sizeof(uint8_t));
4349f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
435053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Process characters until ] is reached. As the test is at the end of the
435153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    loop, an initial ] is taken as a data character. At the start of the loop,
435253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    c contains the first code unit of the character. If it is zero, check for
435353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    the end of the pattern, to allow binary zero as data. */
4354f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
435553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    for(;;)
4356f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
435753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      PCRE2_SPTR oldptr;
435853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef EBCDIC
435953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      BOOL range_is_literal = TRUE;
436053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
436153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
436253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (c == CHAR_NULL && ptr >= cb->end_pattern)
436353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
436453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        *errorcodeptr = ERR6;  /* Missing terminating ']' */
436553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        goto FAILED;
436653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
4367f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
436853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
4369f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (utf && HAS_EXTRALEN(c))
4370f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {                           /* Braces are required because the */
4371f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
4372f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
4373f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
4374f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4375f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* Inside \Q...\E everything is literal except \E */
4376f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4377f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (inescq)
4378f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
4379f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)  /* If we are at \E */
4380f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
4381f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          inescq = FALSE;                   /* Reset literal state */
4382f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          ptr++;                            /* Skip the 'E' */
438353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          goto CONTINUE_CLASS;              /* Carry on with next char */
4384f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
4385f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        goto CHECK_RANGE;                   /* Could be range if \E follows */
4386f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
4387f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4388f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* Handle POSIX class names. Perl allows a negation extension of the
4389f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      form [:^name:]. A square bracket that doesn't match the syntax is
4390f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      treated as a literal. We also recognize the POSIX constructions
4391f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      [.ch.] and [=ch=] ("collating elements") and fault them, as Perl
4392f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      5.6 and 5.8 do. */
4393f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4394f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (c == CHAR_LEFT_SQUARE_BRACKET &&
4395f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
4396f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich           ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr))
4397f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
4398f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        BOOL local_negate = FALSE;
4399f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        int posix_class, taboffset, tabopt;
440053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        register const uint8_t *cbits = cb->cbits;
440153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        uint8_t pbits[32];
4402f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4403f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (ptr[1] != CHAR_COLON)
4404f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
440553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          *errorcodeptr = ERR13;
4406f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          goto FAILED;
4407f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
4408f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4409f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        ptr += 2;
4410f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (*ptr == CHAR_CIRCUMFLEX_ACCENT)
4411f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
4412f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          local_negate = TRUE;
4413f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          should_flip_negation = TRUE;  /* Note negative special */
4414f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          ptr++;
4415f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
4416f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4417f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        posix_class = check_posix_name(ptr, (int)(tempptr - ptr));
4418f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (posix_class < 0)
4419f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
4420f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *errorcodeptr = ERR30;
4421f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          goto FAILED;
4422f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
4423f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4424f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* If matching is caseless, upper and lower are converted to
4425f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        alpha. This relies on the fact that the class table starts with
4426f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        alpha, lower, upper as the first 3 entries. */
4427f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
442853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if ((options & PCRE2_CASELESS) != 0 && posix_class <= 2)
4429f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          posix_class = 0;
4430f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
443153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* When PCRE2_UCP is set, some of the POSIX classes are converted to
4432f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        different escape sequences that use Unicode properties \p or \P. Others
4433f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        that are not available via \p or \P generate XCL_PROP/XCL_NOTPROP
443453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        directly. UCP support is not available unless UTF support is.*/
4435f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
443653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
443753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if ((options & PCRE2_UCP) != 0)
4438f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
4439f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          unsigned int ptype = 0;
4440f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0);
4441f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4442f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          /* The posix_substitutes table specifies which POSIX classes can be
444353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          converted to \p or \P items. This can only happen at top nestling
444453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          level, as there will never be a POSIX class in a string that is
444553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          substituted for something else. */
4446f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4447f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (posix_substitutes[pc] != NULL)
4448f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
444953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            cb->nestptr[0] = tempptr + 1;
4450f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            ptr = posix_substitutes[pc] - 1;
445153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            goto CONTINUE_CLASS;
4452f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
4453f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4454f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          /* There are three other classes that generate special property calls
4455f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          that are recognized only in an XCLASS. */
4456f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4457f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          else switch(posix_class)
4458f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
4459f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case PC_GRAPH:
4460f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            ptype = PT_PXGRAPH;
4461f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            /* Fall through */
4462f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case PC_PRINT:
4463f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            if (ptype == 0) ptype = PT_PXPRINT;
4464f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            /* Fall through */
4465f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case PC_PUNCT:
4466f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            if (ptype == 0) ptype = PT_PXPUNCT;
4467f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
44688b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis            *class_uchardata++ = (PCRE2_UCHAR)ptype;
4469f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            *class_uchardata++ = 0;
4470f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            xclass_has_prop = TRUE;
4471f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            ptr = tempptr + 1;
447253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            goto CONTINUE_CLASS;
4473f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
44740ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes            /* For the other POSIX classes (ascii, xdigit) we are going to fall
44750ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes            through to the non-UCP case and build a bit map for characters with
447653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            code points less than 256. However, if we are in a negated POSIX
447753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            class, characters with code points greater than 255 must either all
447853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            match or all not match, depending on whether the whole class is not
447953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            or is negated. For example, for [[:^ascii:]... they must all match,
448053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            whereas for [^[:^xdigit:]... they must not.
448153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
448253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            In the special case where there are no xclass items, this is
448353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            automatically handled by the use of OP_CLASS or OP_NCLASS, but an
448453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            explicit range is needed for OP_XCLASS. Setting a flag here causes
448553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            the range to be generated later when it is known that OP_XCLASS is
448653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            required. */
4487f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4488f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            default:
448953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            match_all_or_no_wide_chars |= local_negate;
4490f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            break;
4491f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
4492f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
449353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif  /* SUPPORT_UNICODE */
449453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
4495f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* In the non-UCP case, or when UCP makes no difference, we build the
4496f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        bit map for the POSIX class in a chunk of local store because we may be
4497f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        adding and subtracting from it, and we don't want to subtract bits that
4498f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        may be in the main map already. At the end we or the result into the
4499f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        bit map that is being built. */
4500f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4501f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        posix_class *= 3;
4502f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4503f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Copy in the first table (always present) */
4504f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4505f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        memcpy(pbits, cbits + posix_class_maps[posix_class],
450653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          32 * sizeof(uint8_t));
4507f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4508f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* If there is a second table, add or remove it as required. */
4509f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4510f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        taboffset = posix_class_maps[posix_class + 1];
4511f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        tabopt = posix_class_maps[posix_class + 2];
4512f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4513f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (taboffset >= 0)
4514f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
4515f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (tabopt >= 0)
45168b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis            for (c = 0; c < 32; c++) pbits[c] |= cbits[(int)c + taboffset];
4517f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          else
45188b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis            for (c = 0; c < 32; c++) pbits[c] &= ~cbits[(int)c + taboffset];
4519f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
4520f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4521f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Now see if we need to remove any special characters. An option
4522f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        value of 1 removes vertical space and 2 removes underscore. */
4523f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4524f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (tabopt < 0) tabopt = -tabopt;
4525f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (tabopt == 1) pbits[1] &= ~0x3c;
4526f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          else if (tabopt == 2) pbits[11] &= 0x7f;
4527f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4528f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Add the POSIX table or its complement into the main table that is
4529f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        being built and we are done. */
4530f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4531f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (local_negate)
4532f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          for (c = 0; c < 32; c++) classbits[c] |= ~pbits[c];
4533f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        else
4534f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          for (c = 0; c < 32; c++) classbits[c] |= pbits[c];
4535f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4536f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        ptr = tempptr + 1;
4537f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Every class contains at least one < 256 character. */
4538f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        class_has_8bitchar = 1;
4539f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Every class contains at least two characters. */
4540f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        class_one_char = 2;
454153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        goto CONTINUE_CLASS;    /* End of POSIX syntax handling */
4542f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
4543f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4544f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* Backslash may introduce a single character, or it may introduce one
4545f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      of the specials, which just set a flag. The sequence \b is a special
4546f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      case. Inside a class (and only there) it is treated as backspace. We
4547f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      assume that other escapes have more than one character in them, so
4548f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      speculatively set both class_has_8bitchar and class_one_char bigger
454953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      than one. Unrecognized escapes fall through and are faulted. */
4550f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4551f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (c == CHAR_BACKSLASH)
4552f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
455353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        escape = PRIV(check_escape)(&ptr, cb->end_pattern, &ec, errorcodeptr,
455453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          options, TRUE, cb);
4555f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (*errorcodeptr != 0) goto FAILED;
455653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (escape == 0)    /* Escaped single char */
455753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
455853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          c = ec;
455953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef EBCDIC
456053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          range_is_literal = FALSE;
456153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
456253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
4563f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        else if (escape == ESC_b) c = CHAR_BS; /* \b is backspace in a class */
4564f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        else if (escape == ESC_N)          /* \N is not supported in a class */
4565f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
4566f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *errorcodeptr = ERR71;
4567f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          goto FAILED;
4568f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
4569f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        else if (escape == ESC_Q)            /* Handle start of quoted string */
4570f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
4571f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
4572f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
4573f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            ptr += 2; /* avoid empty string */
4574f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
4575f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          else inescq = TRUE;
457653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          goto CONTINUE_CLASS;
4577f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
457853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        else if (escape == ESC_E) goto CONTINUE_CLASS;  /* Ignore orphan \E */
4579f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
458053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        else  /* Handle \d-type escapes */
4581f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
458253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          register const uint8_t *cbits = cb->cbits;
4583f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          /* Every class contains at least two < 256 characters. */
4584f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          class_has_8bitchar++;
4585f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          /* Every class contains at least two characters. */
4586f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          class_one_char += 2;
4587f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4588f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          switch (escape)
4589f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
459053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
4591f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case ESC_du:     /* These are the values given for \d etc */
459253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            case ESC_DU:     /* when PCRE2_UCP is set. We replace the */
4593f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case ESC_wu:     /* escape sequence with an appropriate \p */
4594f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case ESC_WU:     /* or \P to test Unicode properties instead */
459553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            case ESC_su:     /* of the default ASCII testing. This might be */
459653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            case ESC_SU:     /* a 2nd-level nesting for [[:<:]] or [[:>:]]. */
459753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            cb->nestptr[1] = cb->nestptr[0];
459853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            cb->nestptr[0] = ptr;
4599f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            ptr = substitutes[escape - ESC_DU] - 1;  /* Just before substitute */
4600f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            class_has_8bitchar--;                /* Undo! */
460153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            break;
4602f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
4603f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case ESC_d:
4604f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_digit];
460553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            break;
4606f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4607f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case ESC_D:
4608f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            should_flip_negation = TRUE;
4609f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];
461053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            break;
4611f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4612f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case ESC_w:
4613f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_word];
461453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            break;
4615f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4616f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case ESC_W:
4617f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            should_flip_negation = TRUE;
4618f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];
461953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            break;
4620f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4621f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl
4622f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            5.18. Before PCRE 8.34, we had to preserve the VT bit if it was
4623f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            previously set by something earlier in the character class.
4624f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so
4625f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            we could just adjust the appropriate bit. From PCRE 8.34 we no
4626f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            longer treat \s and \S specially. */
4627f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4628f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case ESC_s:
4629f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_space];
463053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            break;
4631f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4632f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case ESC_S:
4633f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            should_flip_negation = TRUE;
4634f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
463553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            break;
4636f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4637f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            /* The rest apply in both UCP and non-UCP cases. */
4638f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4639f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case ESC_h:
464053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            (void)add_list_to_class(classbits, &class_uchardata, options, cb,
4641f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              PRIV(hspace_list), NOTACHAR);
464253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            break;
4643f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4644f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case ESC_H:
4645f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            (void)add_not_list_to_class(classbits, &class_uchardata, options,
464653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              cb, PRIV(hspace_list));
464753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            break;
4648f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4649f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case ESC_v:
465053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            (void)add_list_to_class(classbits, &class_uchardata, options, cb,
4651f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              PRIV(vspace_list), NOTACHAR);
465253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            break;
4653f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4654f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case ESC_V:
4655f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            (void)add_not_list_to_class(classbits, &class_uchardata, options,
465653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              cb, PRIV(vspace_list));
465753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            break;
4658f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4659f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case ESC_p:
4660f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case ESC_P:
466153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
4662f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
4663f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              BOOL negated;
4664f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              unsigned int ptype = 0, pdata = 0;
466553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr, cb))
4666f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                goto FAILED;
4667f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              *class_uchardata++ = ((escape == ESC_p) != negated)?
4668f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                XCL_PROP : XCL_NOTPROP;
4669f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              *class_uchardata++ = ptype;
4670f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              *class_uchardata++ = pdata;
4671f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              xclass_has_prop = TRUE;
4672f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              class_has_8bitchar--;                /* Undo! */
4673f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
467453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            break;
46750ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes#else
46760ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes            *errorcodeptr = ERR45;
46770ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes            goto FAILED;
4678f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
467953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            /* Unrecognized escapes are faulted. */
4680f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4681f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            default:
468253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            *errorcodeptr = ERR7;
468353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            goto FAILED;
4684f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
468553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
468653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          /* Handled \d-type escape */
468753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
468853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          goto CONTINUE_CLASS;
4689f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
4690f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
469153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* Control gets here if the escape just defined a single character.
469253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        This is in c and may be greater than 256. */
4693f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4694f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        escape = 0;
4695f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }   /* End of backslash handling */
4696f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4697f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* A character may be followed by '-' to form a range. However, Perl does
4698f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      not permit ']' to be the end of the range. A '-' character at the end is
4699f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      treated as a literal. Perl ignores orphaned \E sequences entirely. The
4700f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      code for handling \Q and \E is messy. */
4701f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4702f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      CHECK_RANGE:
4703f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      while (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
4704f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
4705f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        inescq = FALSE;
4706f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        ptr += 2;
4707f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
4708f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      oldptr = ptr;
4709f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4710f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* Remember if \r or \n were explicitly used */
4711f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
471253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (c == CHAR_CR || c == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF;
4713f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4714f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* Check for range */
4715f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4716f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (!inescq && ptr[1] == CHAR_MINUS)
4717f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
471853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        uint32_t d;
4719f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        ptr += 2;
4720f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2;
4721f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4722f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* If we hit \Q (not followed by \E) at this point, go into escaped
4723f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        mode. */
4724f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4725f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
4726f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
4727f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          ptr += 2;
4728f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E)
4729f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            { ptr += 2; continue; }
4730f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          inescq = TRUE;
4731f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          break;
4732f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
4733f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4734f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Minus (hyphen) at the end of a class is treated as a literal, so put
4735f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        back the pointer and jump to handle the character that preceded it. */
4736f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4737f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (*ptr == CHAR_NULL || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET))
4738f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
4739f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          ptr = oldptr;
4740f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          goto CLASS_SINGLE_CHARACTER;
4741f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
4742f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4743f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Otherwise, we have a potential range; pick up the next character */
4744f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
474553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
4746f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (utf)
4747f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {                           /* Braces are required because the */
4748f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          GETCHARLEN(d, ptr, ptr);    /* macro generates multiple statements */
4749f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
4750f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        else
4751f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
475253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        d = *ptr;  /* Not UTF mode */
4753f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4754f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* The second part of a range can be a single-character escape
4755f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        sequence, but not any of the other escapes. Perl treats a hyphen as a
4756f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        literal in such circumstances. However, in Perl's warning mode, a
4757f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        warning is given, so PCRE now faults it as it is almost certainly a
4758f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        mistake on the user's part. */
4759f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4760f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (!inescq)
4761f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
4762f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (d == CHAR_BACKSLASH)
4763f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
4764f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            int descape;
476553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            descape = PRIV(check_escape)(&ptr, cb->end_pattern, &d,
476653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              errorcodeptr, options, TRUE, cb);
4767f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            if (*errorcodeptr != 0) goto FAILED;
476853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef EBCDIC
476953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            range_is_literal = FALSE;
477053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
4771f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            /* 0 means a character was put into d; \b is backspace; any other
4772f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            special causes an error. */
4773f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4774f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            if (descape != 0)
4775f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
4776f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              if (descape == ESC_b) d = CHAR_BS; else
4777f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                {
477853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                *errorcodeptr = ERR50;
4779f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                goto FAILED;
4780f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                }
4781f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
4782f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
4783f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4784f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          /* A hyphen followed by a POSIX class is treated in the same way. */
4785f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4786f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          else if (d == CHAR_LEFT_SQUARE_BRACKET &&
4787f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                   (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
4788f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                    ptr[1] == CHAR_EQUALS_SIGN) &&
4789f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                   check_posix_syntax(ptr, &tempptr))
4790f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
479153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            *errorcodeptr = ERR50;
4792f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            goto FAILED;
4793f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
4794f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
4795f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4796f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Check that the two values are in the correct order. Optimize
4797f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        one-character ranges. */
4798f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4799f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (d < c)
4800f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
4801f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *errorcodeptr = ERR8;
4802f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          goto FAILED;
4803f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
4804f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (d == c) goto CLASS_SINGLE_CHARACTER;  /* A few lines below */
4805f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4806f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* We have found a character range, so single character optimizations
4807f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        cannot be done anymore. Any value greater than 1 indicates that there
4808f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        is more than one character. */
4809f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4810f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        class_one_char = 2;
4811f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4812f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Remember an explicit \r or \n, and add the range to the class. */
4813f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
481453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (d == CHAR_CR || d == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF;
4815f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
481653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* In an EBCDIC environment, Perl treats alphabetic ranges specially
481753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        because there are holes in the encoding, and simply using the range A-Z
481853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        (for example) would include the characters in the holes. This applies
481953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        only to literal ranges; [\xC1-\xE9] is different to [A-Z]. */
482053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
482153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef EBCDIC
482253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (range_is_literal &&
482353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis             (cb->ctypes[c] & ctype_letter) != 0 &&
482453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis             (cb->ctypes[d] & ctype_letter) != 0 &&
482553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis             (c <= CHAR_z) == (d <= CHAR_z))
482653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
482753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          uint32_t uc = (c <= CHAR_z)? 0 : 64;
482853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          uint32_t C = c - uc;
482953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          uint32_t D = d - uc;
483053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
483153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (C <= CHAR_i)
483253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {
483353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            class_has_8bitchar +=
483453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              add_to_class(classbits, &class_uchardata, options, cb, C + uc,
483553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                ((D < CHAR_i)? D : CHAR_i) + uc);
483653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            C = CHAR_j;
483753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            }
483853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
483953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (C <= D && C <= CHAR_r)
484053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {
484153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            class_has_8bitchar +=
484253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              add_to_class(classbits, &class_uchardata, options, cb, C + uc,
484353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                ((D < CHAR_r)? D : CHAR_r) + uc);
484453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            C = CHAR_s;
484553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            }
4846f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
484753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (C <= D)
484853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {
484953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            class_has_8bitchar +=
485053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              add_to_class(classbits, &class_uchardata, options, cb, C + uc,
485153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                D + uc);
485253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            }
485353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
485453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        else
485553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
485653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        class_has_8bitchar +=
485753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          add_to_class(classbits, &class_uchardata, options, cb, c, d);
485853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        goto CONTINUE_CLASS;   /* Go get the next char in the class */
4859f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
4860f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4861f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* Handle a single character - we can get here for a normal non-escape
4862f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      char, or after \ that introduces a single character or for an apparent
4863f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      range that isn't. Only the value 1 matters for class_one_char, so don't
4864f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      increase it if it is already 2 or more ... just in case there's a class
4865f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      with a zillion characters in it. */
4866f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4867f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      CLASS_SINGLE_CHARACTER:
4868f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (class_one_char < 2) class_one_char++;
4869f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
487053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* If class_one_char is 1 and xclass_has_prop is false, we have the first
48710ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes      single character in the class, and there have been no prior ranges, or
48720ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes      XCLASS items generated by escapes. If this is the final character in the
48730ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes      class, we can optimize by turning the item into a 1-character OP_CHAR[I]
48740ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes      if it's positive, or OP_NOT[I] if it's negative. In the positive case, it
487553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      can cause firstcu to be set. Otherwise, there can be no first char if
48760ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes      this item is first, whatever repeat count may follow. In the case of
487753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      reqcu, save the previous value for reinstating. */
4878f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
48790ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes      if (!inescq &&
488053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
48810ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes          !xclass_has_prop &&
48820ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes#endif
48830ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes          class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
4884f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
4885f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        ptr++;
488653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        zeroreqcu = reqcu;
488753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        zeroreqcuflags = reqcuflags;
4888f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4889f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (negate_class)
4890f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
489153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
4892f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          int d;
4893f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
489453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
489553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          zerofirstcu = firstcu;
489653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          zerofirstcuflags = firstcuflags;
4897f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
489853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          /* For caseless UTF mode, check whether this character has more than
489953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          one other case. If so, generate a special OP_NOTPROP item instead of
490053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          OP_NOTI. */
4901f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
490253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
490353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (utf && (options & PCRE2_CASELESS) != 0 &&
4904f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              (d = UCD_CASESET(c)) != 0)
4905f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
4906f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            *code++ = OP_NOTPROP;
4907f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            *code++ = PT_CLIST;
4908f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            *code++ = d;
4909f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
4910f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          else
4911f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
4912f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          /* Char has only one other case, or UCP not available */
4913f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4914f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
491553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            *code++ = ((options & PCRE2_CASELESS) != 0)? OP_NOTI: OP_NOT;
491653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            code += PUTCHAR(c, code);
4917f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
4918f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4919f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          /* We are finished with this character class */
4920f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4921f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          goto END_CLASS;
4922f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
4923f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4924f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* For a single, positive character, get the value into mcbuffer, and
4925f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        then we can handle this with the normal one-character code. */
4926f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
492753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        mclength = PUTCHAR(c, mcbuffer);
4928f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        goto ONE_CHAR;
4929f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }       /* End of 1-char optimization */
4930f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4931f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* There is more than one character in the class, or an XCLASS item
4932f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      has been generated. Add this character to the class. */
4933f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4934f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      class_has_8bitchar +=
493553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        add_to_class(classbits, &class_uchardata, options, cb, c, c);
4936f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
493753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Continue to the next character in the class. Closing square bracket
493853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      not within \Q..\E ends the class. A NULL character terminates a
493953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      nested substitution string, but may be a data character in the main
494053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      pattern (tested at the start of this loop). */
4941f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
494253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      CONTINUE_CLASS:
494353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      c = *(++ptr);
494453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (c == CHAR_NULL && cb->nestptr[0] != NULL)
494553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
494653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        ptr = cb->nestptr[0];
494753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        cb->nestptr[0] = cb->nestptr[1];
494853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        cb->nestptr[1] = NULL;
494953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        c = *(++ptr);
495053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
4951f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
495253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_WIDE_CHARS
495353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* If any wide characters have been encountered, set xclass = TRUE. Then,
495453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      in the pre-compile phase, accumulate the length of the wide characters
495553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      and reset the pointer. This is so that very large classes that contain a
495653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      zillion wide characters do not overwrite the work space (which is on the
495753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      stack). */
4958f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
495953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (class_uchardata > class_uchardata_base)
496053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
496153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        xclass = TRUE;
496253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (lengthptr != NULL)
496353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
496453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          *lengthptr += class_uchardata - class_uchardata_base;
496553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          class_uchardata = class_uchardata_base;
496653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
496753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
4968f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
496953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* An unescaped ] ends the class */
497053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
497153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (c == CHAR_RIGHT_SQUARE_BRACKET && !inescq) break;
497253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }   /* End of main class-processing loop */
4973f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
4974f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* If this is the first thing in the branch, there can be no first char
497553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    setting, whatever the repeat count. Any reqcu setting must remain
4976f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    unchanged after any kind of repeat. */
4977f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
497853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
497953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    zerofirstcu = firstcu;
498053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    zerofirstcuflags = firstcuflags;
498153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    zeroreqcu = reqcu;
498253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    zeroreqcuflags = reqcuflags;
498353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
498453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* If there are characters with values > 255, or Unicode property settings
498553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    (\p or \P), we have to compile an extended class, with its own opcode,
498653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    unless there were no property settings and there was a negated special such
498753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    as \S in the class, and PCRE2_UCP is not set, because in that case all
498853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    characters > 255 are in or not in the class, so any that were explicitly
498953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    given as well can be ignored.
499053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
499153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    In the UCP case, if certain negated POSIX classes ([:^ascii:] or
499253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    [^:xdigit:]) were present in a class, we either have to match or not match
499353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    all wide characters (depending on whether the whole class is or is not
499453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    negated). This requirement is indicated by match_all_or_no_wide_chars being
499553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    true. We do this by including an explicit range, which works in both cases.
499653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
499753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    If, when generating an xclass, there are no characters < 256, we can omit
499853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    the bitmap in the actual compiled code. */
499953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
500053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_WIDE_CHARS
500153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
50020ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes    if (xclass && (xclass_has_prop || !should_flip_negation ||
500353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis         (options & PCRE2_UCP) != 0))
500453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#elif PCRE2_CODE_UNIT_WIDTH != 8
50050ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes    if (xclass && (xclass_has_prop || !should_flip_negation))
5006f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
5007f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
500853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (match_all_or_no_wide_chars)
500953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
501053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        *class_uchardata++ = XCL_RANGE;
501153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        class_uchardata += PRIV(ord2utf)(0x100, class_uchardata);
501253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        class_uchardata += PRIV(ord2utf)(MAX_UTF_CODE_POINT, class_uchardata);
501353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
5014f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      *class_uchardata++ = XCL_END;    /* Marks the end of extra data */
5015f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      *code++ = OP_XCLASS;
5016f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      code += LINK_SIZE;
5017f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      *code = negate_class? XCL_NOT:0;
5018f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (xclass_has_prop) *code |= XCL_HASPROP;
5019f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5020f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* If the map is required, move up the extra data to make room for it;
5021f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      otherwise just move the code pointer to the end of the extra data. */
5022f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5023f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (class_has_8bitchar > 0)
5024f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
5025f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        *code++ |= XCL_MAP;
502653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
502753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          CU2BYTES(class_uchardata - code));
5028f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (negate_class && !xclass_has_prop)
5029f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          for (c = 0; c < 32; c++) classbits[c] = ~classbits[c];
5030f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        memcpy(code, classbits, 32);
503153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        code = class_uchardata + (32 / sizeof(PCRE2_UCHAR));
5032f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
5033f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      else code = class_uchardata;
5034f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5035f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* Now fill in the complete length of the item */
5036f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5037f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      PUT(previous, 1, (int)(code - previous));
5038f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      break;   /* End of class handling */
5039f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
5040f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
5041f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5042f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* If there are no characters > 255, or they are all to be included or
5043f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the
5044f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    whole class was negated and whether there were negative specials such as \S
5045f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    (non-UCP) in the class. Then copy the 32-byte map into the code vector,
5046f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    negating it if necessary. */
5047f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5048f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
5049f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (lengthptr == NULL)    /* Save time in the pre-compile phase */
5050f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
5051f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (negate_class)
5052f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        for (c = 0; c < 32; c++) classbits[c] = ~classbits[c];
5053f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      memcpy(code, classbits, 32);
5054f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
505553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    code += 32 / sizeof(PCRE2_UCHAR);
5056f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5057f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    END_CLASS:
5058f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    break;
5059f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5060f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5061f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* ===================================================================*/
5062f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Various kinds of repeat; '{' is not necessarily a quantifier, but this
5063f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    has been tested above. */
5064f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5065f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case CHAR_LEFT_CURLY_BRACKET:
5066f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (!is_quantifier) goto NORMAL_CHAR;
5067f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);
5068f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (*errorcodeptr != 0) goto FAILED;
5069f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    goto REPEAT;
5070f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5071f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case CHAR_ASTERISK:
5072f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    repeat_min = 0;
5073f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    repeat_max = -1;
5074f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    goto REPEAT;
5075f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5076f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case CHAR_PLUS:
5077f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    repeat_min = 1;
5078f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    repeat_max = -1;
5079f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    goto REPEAT;
5080f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5081f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case CHAR_QUESTION_MARK:
5082f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    repeat_min = 0;
5083f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    repeat_max = 1;
5084f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5085f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    REPEAT:
5086f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (previous == NULL)
5087f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
5088f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      *errorcodeptr = ERR9;
5089f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      goto FAILED;
5090f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
5091f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5092f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (repeat_min == 0)
5093f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
509453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      firstcu = zerofirstcu;    /* Adjust for zero repeat */
509553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      firstcuflags = zerofirstcuflags;
509653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      reqcu = zeroreqcu;        /* Ditto */
509753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      reqcuflags = zeroreqcuflags;
5098f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
5099f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5100f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Remember whether this is a variable length repeat */
5101f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5102f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY;
5103f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5104f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    op_type = 0;                    /* Default single-char op codes */
5105f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    possessive_quantifier = FALSE;  /* Default not possessive quantifier */
5106f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5107f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Save start of previous item, in case we have to move it up in order to
5108f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    insert something before it. */
5109f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5110f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    tempcode = previous;
5111f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5112f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Before checking for a possessive quantifier, we must skip over
5113f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    whitespace and comments in extended mode because Perl allows white space at
5114f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    this point. */
5115f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
511653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if ((options & PCRE2_EXTENDED) != 0)
5117f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
511853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      ptr++;
5119f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      for (;;)
5120f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
512153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        while (MAX_255(*ptr) && (cb->ctypes[*ptr] & ctype_space) != 0) ptr++;
512253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (*ptr != CHAR_NUMBER_SIGN) break;
512353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        ptr++;
512453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        while (ptr < cb->end_pattern)
5125f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
512653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (IS_NEWLINE(ptr))         /* For non-fixed-length newline cases, */
512753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {                        /* IS_NEWLINE sets cb->nllen. */
512853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            ptr += cb->nllen;
5129f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            break;
5130f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
513153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          ptr++;
513253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
513353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (utf) FORWARDCHAR(ptr);
5134f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
5135f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }           /* Loop for comment characters */
5136f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }             /* Loop for multiple comments */
513753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      ptr--;          /* Last code unit of previous character. */
5138f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
5139f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5140f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* If the next character is '+', we have a possessive quantifier. This
514153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    implies greediness, whatever the setting of the PCRE2_UNGREEDY option.
5142f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    If the next character is '?' this is a minimizing repeat, by default,
514353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    but if PCRE2_UNGREEDY is set, it works the other way round. We change the
5144f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    repeat type to the non-default. */
5145f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5146f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (ptr[1] == CHAR_PLUS)
5147f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
5148f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      repeat_type = 0;                  /* Force greedy */
5149f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      possessive_quantifier = TRUE;
5150f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      ptr++;
5151f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
5152f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    else if (ptr[1] == CHAR_QUESTION_MARK)
5153f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
5154f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      repeat_type = greedy_non_default;
5155f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      ptr++;
5156f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
5157f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    else repeat_type = greedy_default;
5158f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
515953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* If the repeat is {1} we can ignore it. */
516053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
516153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
516253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
5163f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* If previous was a recursion call, wrap it in atomic brackets so that
5164f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    previous becomes the atomic group. All recursions were so wrapped in the
5165f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    past, but it no longer happens for non-repeated recursions. In fact, the
5166f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    repeated ones could be re-implemented independently so as not to need this,
5167f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    but for the moment we rely on the code for repeating groups. */
5168f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5169f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (*previous == OP_RECURSE)
5170f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
517153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE));
5172f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      *previous = OP_ONCE;
5173f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      PUT(previous, 1, 2 + 2*LINK_SIZE);
5174f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      previous[2 + 2*LINK_SIZE] = OP_KET;
5175f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      PUT(previous, 3 + 2*LINK_SIZE, 2 + 2*LINK_SIZE);
5176f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      code += 2 + 2 * LINK_SIZE;
5177f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      length_prevgroup = 3 + 3*LINK_SIZE;
5178f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
5179f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5180f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Now handle repetition for the different types of item. */
5181f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5182f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* If previous was a character or negated character match, abolish the item
5183f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    and generate a repeat item instead. If a char item has a minimum of more
518453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    than one, ensure that it is set in reqcu - it might not be if a sequence
5185f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    such as x{3} is the first thing in a branch because the x will have gone
518653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    into firstcu instead.  */
5187f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5188f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (*previous == OP_CHAR || *previous == OP_CHARI
5189f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        || *previous == OP_NOT || *previous == OP_NOTI)
5190f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
5191f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      switch (*previous)
5192f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
5193f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        default: /* Make compiler happy. */
5194f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case OP_CHAR:  op_type = OP_STAR - OP_STAR; break;
5195f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case OP_CHARI: op_type = OP_STARI - OP_STAR; break;
5196f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case OP_NOT:   op_type = OP_NOTSTAR - OP_STAR; break;
5197f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case OP_NOTI:  op_type = OP_NOTSTARI - OP_STAR; break;
5198f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
5199f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
520053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Deal with UTF characters that take up more than one code unit. It's
5201f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      easier to write this out separately than try to macrify it. Use c to
520253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      hold the length of the character in code units, plus UTF_LENGTH to flag
520353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      that it's a length rather than a small character. */
5204f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
520553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef MAYBE_UTF_MULTI
520653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (utf && NOT_FIRSTCU(code[-1]))
5207f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
520853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        PCRE2_UCHAR *lastchar = code - 1;
5209f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        BACKCHAR(lastchar);
521053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        c = (int)(code - lastchar);               /* Length of UTF character */
521153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        memcpy(utf_units, lastchar, CU2BYTES(c)); /* Save the char */
521253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        c |= UTF_LENGTH;                          /* Flag c as a length */
5213f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
5214f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      else
521553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif  /* MAYBE_UTF_MULTI */
5216f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5217f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* Handle the case of a single charater - either with no UTF support, or
521853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      with UTF disabled, or for a single-code-unit UTF character. */
5219f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
5220f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        c = code[-1];
5221f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (*previous <= OP_CHARI && repeat_min > 1)
5222f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
522353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          reqcu = c;
522453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          reqcuflags = req_caseopt | cb->req_varyopt;
5225f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
5226f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
5227f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5228f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      goto OUTPUT_SINGLE_REPEAT;   /* Code shared with single character types */
5229f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
5230f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5231f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* If previous was a character type match (\d or similar), abolish it and
5232f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    create a suitable repeat item. The code is shared with single-character
5233f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    repeats by setting op_type to add a suitable offset into repeat_type. Note
523453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    the the Unicode property types will be present only when SUPPORT_UNICODE is
5235f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    defined, but we don't wrap the little bits of code here because it just
5236f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    makes it horribly messy. */
5237f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5238f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    else if (*previous < OP_EODN)
5239f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
524053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      PCRE2_UCHAR *oldcode;
5241f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      int prop_type, prop_value;
524253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      op_type = OP_TYPESTAR - OP_STAR;      /* Use type opcodes */
524353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      c = *previous;                        /* Save previous opcode */
524453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (c == OP_PROP || c == OP_NOTPROP)
5245f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
5246f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        prop_type = previous[1];
5247f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        prop_value = previous[2];
5248f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
524953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      else
525053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
525153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* Come here from just above with a character in c */
525253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        OUTPUT_SINGLE_REPEAT:
525353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        prop_type = prop_value = -1;
525453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
5255f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
525653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* At this point we either have prop_type == prop_value == -1 and either
525753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      a code point or a character type that is not OP_[NOT]PROP in c, or we
525853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      have OP_[NOT]PROP in c and prop_type/prop_value not negative. */
525953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
526053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      oldcode = code;                   /* Save where we were */
5261f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      code = previous;                  /* Usually overwrite previous item */
5262f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5263f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* If the maximum is zero then the minimum must also be zero; Perl allows
5264f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      this case, so we do too - by simply omitting the item altogether. */
5265f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5266f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (repeat_max == 0) goto END_REPEAT;
5267f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5268f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* Combine the op_type with the repeat_type */
5269f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5270f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      repeat_type += op_type;
5271f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5272f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* A minimum of zero is handled either as the special case * or ?, or as
5273f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      an UPTO, with the maximum given. */
5274f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5275f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (repeat_min == 0)
5276f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
5277f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (repeat_max == -1) *code++ = OP_STAR + repeat_type;
5278f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type;
5279f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        else
5280f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
5281f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *code++ = OP_UPTO + repeat_type;
5282f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          PUT2INC(code, 0, repeat_max);
5283f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
5284f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
5285f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5286f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* A repeat minimum of 1 is optimized into some special cases. If the
5287f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      maximum is unlimited, we use OP_PLUS. Otherwise, the original item is
5288f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      left in place and, if the maximum is greater than 1, we use OP_UPTO with
5289f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      one less than the maximum. */
5290f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5291f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      else if (repeat_min == 1)
5292f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
5293f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (repeat_max == -1)
5294f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *code++ = OP_PLUS + repeat_type;
5295f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        else
5296f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
529753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          code = oldcode;                 /* Leave previous item in place */
5298f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (repeat_max == 1) goto END_REPEAT;
5299f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *code++ = OP_UPTO + repeat_type;
5300f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          PUT2INC(code, 0, repeat_max - 1);
5301f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
5302f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
5303f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5304f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* The case {n,n} is just an EXACT, while the general case {n,m} is
530553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      handled as an EXACT followed by an UPTO or STAR or QUERY. */
5306f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5307f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      else
5308f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
5309f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        *code++ = OP_EXACT + op_type;  /* NB EXACT doesn't have repeat_type */
5310f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        PUT2INC(code, 0, repeat_min);
5311f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
531253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* Unless repeat_max equals repeat_min, fill in the data for EXACT, and
531353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        then generate the second opcode. In UTF mode, multi-code-unit
531453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        characters have their length in c, with the UTF_LENGTH bit as a flag,
531553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        and the code units in utf_units. For a repeated Unicode property match,
531653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        there are two extra values that define the required property, and c
531753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        never has the UTF_LENGTH bit set. */
5318f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
531953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (repeat_max != repeat_min)
5320f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
532153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef MAYBE_UTF_MULTI
5322f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (utf && (c & UTF_LENGTH) != 0)
5323f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
532453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            memcpy(code, utf_units, CU2BYTES(c & 7));
5325f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            code += c & 7;
5326f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
5327f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          else
532853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif  /* MAYBE_UTF_MULTI */
5329f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
5330f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            *code++ = c;
5331f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            if (prop_type >= 0)
5332f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
5333f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              *code++ = prop_type;
5334f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              *code++ = prop_value;
5335f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
5336f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
5337f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
533853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          /* Now set up the following opcode */
5339f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
534053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (repeat_max < 0) *code++ = OP_STAR + repeat_type; else
5341f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
534253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            repeat_max -= repeat_min;
534353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            if (repeat_max == 1)
534453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              {
534553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              *code++ = OP_QUERY + repeat_type;
534653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              }
534753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            else
534853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              {
534953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              *code++ = OP_UPTO + repeat_type;
535053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              PUT2INC(code, 0, repeat_max);
535153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              }
5352f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
5353f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
5354f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
5355f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
535653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Fill in the character or character type for the final opcode. */
5357f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
535853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef MAYBE_UTF_MULTI
5359f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (utf && (c & UTF_LENGTH) != 0)
5360f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
536153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        memcpy(code, utf_units, CU2BYTES(c & 7));
5362f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        code += c & 7;
5363f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
5364f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      else
536553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif  /* MAYBEW_UTF_MULTI */
5366f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
536753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        *code++ = c;
536853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (prop_type >= 0)
536953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
537053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          *code++ = prop_type;
537153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          *code++ = prop_value;
537253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
5373f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
5374f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
5375f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5376f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* If previous was a character class or a back reference, we put the repeat
5377f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    stuff after it, but just skip the item if the repeat was {0,0}. */
5378f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5379f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    else if (*previous == OP_CLASS || *previous == OP_NCLASS ||
538053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_WIDE_CHARS
5381f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich             *previous == OP_XCLASS ||
5382f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
5383f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich             *previous == OP_REF   || *previous == OP_REFI ||
5384f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich             *previous == OP_DNREF || *previous == OP_DNREFI)
5385f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
5386f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (repeat_max == 0)
5387f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
5388f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        code = previous;
5389f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        goto END_REPEAT;
5390f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
5391f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5392f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (repeat_min == 0 && repeat_max == -1)
5393f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        *code++ = OP_CRSTAR + repeat_type;
5394f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      else if (repeat_min == 1 && repeat_max == -1)
5395f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        *code++ = OP_CRPLUS + repeat_type;
5396f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      else if (repeat_min == 0 && repeat_max == 1)
5397f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        *code++ = OP_CRQUERY + repeat_type;
5398f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      else
5399f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
5400f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        *code++ = OP_CRRANGE + repeat_type;
5401f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        PUT2INC(code, 0, repeat_min);
5402f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (repeat_max == -1) repeat_max = 0;  /* 2-byte encoding for max */
5403f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        PUT2INC(code, 0, repeat_max);
5404f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
5405f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
5406f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5407f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* If previous was a bracket group, we may have to replicate it in certain
5408f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    cases. Note that at this point we can encounter only the "basic" bracket
5409f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    opcodes such as BRA and CBRA, as this is the place where they get converted
5410f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    into the more special varieties such as BRAPOS and SBRA. A test for >=
5411f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    OP_ASSERT and <= OP_COND includes ASSERT, ASSERT_NOT, ASSERTBACK,
5412f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    ASSERTBACK_NOT, ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND.
5413f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    Originally, PCRE did not allow repetition of assertions, but now it does,
5414f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    for Perl compatibility. */
5415f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5416f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    else if (*previous >= OP_ASSERT && *previous <= OP_COND)
5417f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
5418f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      register int i;
5419f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      int len = (int)(code - previous);
542053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      PCRE2_UCHAR *bralink = NULL;
542153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      PCRE2_UCHAR *brazeroptr = NULL;
5422f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
542353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Repeating a DEFINE group (or any group where the condition is always
542453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      FALSE and there is only one branch) is pointless, but Perl allows the
542553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      syntax, so we just ignore the repeat. */
5426f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
542753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_FALSE &&
542853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          previous[GET(previous, 1)] != OP_ALT)
5429f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        goto END_REPEAT;
5430f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5431f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* There is no sense in actually repeating assertions. The only potential
5432f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      use of repetition is in cases when the assertion is optional. Therefore,
5433f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if the minimum is greater than zero, just ignore the repeat. If the
5434f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      maximum is not zero or one, set it to 1. */
5435f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5436f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (*previous < OP_ONCE)    /* Assertion */
5437f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
5438f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (repeat_min > 0) goto END_REPEAT;
5439f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (repeat_max < 0 || repeat_max > 1) repeat_max = 1;
5440f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
5441f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5442f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* The case of a zero minimum is special because of the need to stick
5443f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      OP_BRAZERO in front of it, and because the group appears once in the
5444f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      data, whereas in other cases it appears the minimum number of times. For
5445f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      this reason, it is simplest to treat this case separately, as otherwise
5446f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      the code gets far too messy. There are several special subcases when the
5447f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      minimum is zero. */
5448f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5449f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (repeat_min == 0)
5450f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
5451f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* If the maximum is also zero, we used to just omit the group from the
5452f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        output altogether, like this:
5453f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5454f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        ** if (repeat_max == 0)
5455f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        **   {
5456f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        **   code = previous;
5457f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        **   goto END_REPEAT;
5458f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        **   }
5459f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5460f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        However, that fails when a group or a subgroup within it is referenced
5461f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        as a subroutine from elsewhere in the pattern, so now we stick in
5462f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        OP_SKIPZERO in front of it so that it is skipped on execution. As we
5463f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        don't have a list of which groups are referenced, we cannot do this
5464f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        selectively.
5465f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5466f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        If the maximum is 1 or unlimited, we just have to stick in the BRAZERO
546753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        and do no more at this point. */
5468f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5469f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (repeat_max <= 1)    /* Covers 0, 1, and unlimited */
5470f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
547153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          memmove(previous + 1, previous, CU2BYTES(len));
5472f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          code++;
5473f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (repeat_max == 0)
5474f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
5475f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            *previous++ = OP_SKIPZERO;
5476f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            goto END_REPEAT;
5477f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
5478f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          brazeroptr = previous;    /* Save for possessive optimizing */
5479f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *previous++ = OP_BRAZERO + repeat_type;
5480f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
5481f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5482f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* If the maximum is greater than 1 and limited, we have to replicate
5483f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        in a nested fashion, sticking OP_BRAZERO before each set of brackets.
5484f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        The first one has to be handled carefully because it's the original
5485f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        copy, which has to be moved up. The remainder can be handled by code
5486f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        that is common with the non-zero minimum case below. We have to
548753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        adjust the value or repeat_max, since one less copy is required. */
5488f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5489f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        else
5490f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
5491f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          int offset;
549253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len));
5493f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          code += 2 + LINK_SIZE;
5494f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *previous++ = OP_BRAZERO + repeat_type;
5495f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *previous++ = OP_BRA;
5496f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5497f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          /* We chain together the bracket offset fields that have to be
5498f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          filled in later when the ends of the brackets are reached. */
5499f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5500f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          offset = (bralink == NULL)? 0 : (int)(previous - bralink);
5501f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          bralink = previous;
5502f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          PUTINC(previous, 0, offset);
5503f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
5504f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5505f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        repeat_max--;
5506f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
5507f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5508f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* If the minimum is greater than zero, replicate the group as many
5509f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      times as necessary, and adjust the maximum to the number of subsequent
551053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      copies that we need. */
5511f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5512f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      else
5513f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
5514f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (repeat_min > 1)
5515f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
5516f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          /* In the pre-compile phase, we don't actually do the replication. We
5517f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          just adjust the length as if we had. Do some paranoid checks for
5518f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit
5519f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          integer type when available, otherwise double. */
5520f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5521f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (lengthptr != NULL)
5522f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
552353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            size_t delta = (repeat_min - 1)*length_prevgroup;
5524f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            if ((INT64_OR_DOUBLE)(repeat_min - 1)*
5525f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                  (INT64_OR_DOUBLE)length_prevgroup >
5526f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                    (INT64_OR_DOUBLE)INT_MAX ||
5527f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                OFLOW_MAX - *lengthptr < delta)
5528f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
5529f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              *errorcodeptr = ERR20;
5530f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              goto FAILED;
5531f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
5532f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            *lengthptr += delta;
5533f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
5534f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5535f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          /* This is compiling for real. If there is a set first byte for
553653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          the group, and we have not yet set a "required byte", set it. */
5537f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5538f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          else
5539f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
554053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            if (groupsetfirstcu && reqcuflags < 0)
5541f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
554253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              reqcu = firstcu;
554353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              reqcuflags = firstcuflags;
5544f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
5545f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            for (i = 1; i < repeat_min; i++)
5546f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
554753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              memcpy(code, previous, CU2BYTES(len));
5548f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              code += len;
5549f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
5550f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
5551f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
5552f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5553f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (repeat_max > 0) repeat_max -= repeat_min;
5554f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
5555f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5556f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* This code is common to both the zero and non-zero minimum cases. If
5557f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      the maximum is limited, it replicates the group in a nested fashion,
5558f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      remembering the bracket starts on a stack. In the case of a zero minimum,
5559f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      the first one was set up above. In all cases the repeat_max now specifies
5560f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      the number of additional copies needed. Again, we must remember to
5561f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      replicate entries on the forward reference list. */
5562f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5563f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (repeat_max >= 0)
5564f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
5565f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* In the pre-compile phase, we don't actually do the replication. We
5566f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        just adjust the length as if we had. For each repetition we must add 1
5567f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        to the length for BRAZERO and for all but the last repetition we must
5568f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some
5569f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is
5570f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        a 64-bit integer type when available, otherwise double. */
5571f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5572f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (lengthptr != NULL && repeat_max > 0)
5573f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
557453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          size_t delta = repeat_max*(length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
5575f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                      2 - 2*LINK_SIZE;   /* Last one doesn't nest */
5576f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if ((INT64_OR_DOUBLE)repeat_max *
5577f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
5578f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                  > (INT64_OR_DOUBLE)INT_MAX ||
5579f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              OFLOW_MAX - *lengthptr < delta)
5580f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
5581f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            *errorcodeptr = ERR20;
5582f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            goto FAILED;
5583f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
5584f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *lengthptr += delta;
5585f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
5586f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5587f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* This is compiling for real */
5588f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5589f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        else for (i = repeat_max - 1; i >= 0; i--)
5590f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
5591f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *code++ = OP_BRAZERO + repeat_type;
5592f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5593f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          /* All but the final copy start a new nesting, maintaining the
5594f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          chain of brackets outstanding. */
5595f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5596f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (i != 0)
5597f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
5598f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            int offset;
5599f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            *code++ = OP_BRA;
5600f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            offset = (bralink == NULL)? 0 : (int)(code - bralink);
5601f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            bralink = code;
5602f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            PUTINC(code, 0, offset);
5603f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
5604f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
560553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          memcpy(code, previous, CU2BYTES(len));
5606f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          code += len;
5607f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
5608f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5609f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Now chain through the pending brackets, and fill in their length
5610f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        fields (which are holding the chain links pro tem). */
5611f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5612f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        while (bralink != NULL)
5613f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
5614f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          int oldlinkoffset;
5615f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          int offset = (int)(code - bralink + 1);
561653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          PCRE2_UCHAR *bra = code - offset;
5617f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          oldlinkoffset = GET(bra, 1);
5618f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
5619f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *code++ = OP_KET;
5620f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          PUTINC(code, 0, offset);
5621f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          PUT(bra, 1, offset);
5622f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
5623f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
5624f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5625f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* If the maximum is unlimited, set a repeater in the final copy. For
5626f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      ONCE brackets, that's all we need to do. However, possessively repeated
5627f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      ONCE brackets can be converted into non-capturing brackets, as the
5628f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
5629f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      deal with possessive ONCEs specially.
5630f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5631f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      Otherwise, when we are doing the actual compile phase, check to see
5632f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      whether this group is one that could match an empty string. If so,
5633f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so
5634f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      that runtime checking can be done. [This check is also applied to ONCE
5635f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      groups at runtime, but in a different way.]
5636f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5637f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      Then, if the quantifier was possessive and the bracket is not a
5638f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      conditional, we convert the BRA code to the POS form, and the KET code to
5639f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      KETRPOS. (It turns out to be convenient at runtime to detect this kind of
5640f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      subpattern at both the start and at the end.) The use of special opcodes
564153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      makes it possible to reduce greatly the stack usage in pcre2_match(). If
5642f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO.
5643f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5644f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      Then, if the minimum number of matches is 1 or 0, cancel the possessive
5645f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      flag so that the default action below, of wrapping everything inside
5646f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      atomic brackets, does not happen. When the minimum is greater than 1,
5647f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      there will be earlier copies of the group, and so we still have to wrap
5648f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      the whole thing. */
5649f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5650f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      else
5651f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
565253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        PCRE2_UCHAR *ketcode = code - 1 - LINK_SIZE;
565353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        PCRE2_UCHAR *bracode = ketcode - GET(ketcode, 1);
5654f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5655f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Convert possessive ONCE brackets to non-capturing */
5656f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5657f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) &&
5658f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            possessive_quantifier) *bracode = OP_BRA;
5659f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5660f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* For non-possessive ONCE brackets, all we need to do is to
5661f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        set the KET. */
5662f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5663f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC)
5664f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *ketcode = OP_KETRMAX + repeat_type;
5665f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5666f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Handle non-ONCE brackets and possessive ONCEs (which have been
5667f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        converted to non-capturing above). */
5668f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5669f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        else
5670f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
567153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          /* In the compile phase, check whether the group could match an empty
567253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          string. */
5673f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5674f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (lengthptr == NULL)
5675f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
567653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            PCRE2_UCHAR *scode = bracode;
5677f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            do
5678f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
567953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              int count = 0;
568053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              int rc = could_be_empty_branch(scode, ketcode, utf, cb, FALSE,
568153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                NULL, &count);
568253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              if (rc < 0)
568353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                {
568453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                *errorcodeptr = ERR86;
568553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                goto FAILED;
568653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                }
568753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              if (rc > 0)
5688f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                {
5689f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                *bracode += OP_SBRA - OP_BRA;
5690f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                break;
5691f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                }
5692f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              scode += GET(scode, 1);
5693f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
5694f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            while (*scode == OP_ALT);
5695f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
569653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            /* A conditional group with only one branch has an implicit empty
569753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            alternative branch. */
56980ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes
569953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            if (*bracode == OP_COND && bracode[GET(bracode,1)] != OP_ALT)
570053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              *bracode = OP_SCOND;
570153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            }
57020ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes
5703f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          /* Handle possessive quantifiers. */
5704f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5705f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (possessive_quantifier)
5706f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
5707f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            /* For COND brackets, we wrap the whole thing in a possessively
5708f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            repeated non-capturing bracket, because we have not invented POS
570953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            versions of the COND opcodes. */
5710f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5711f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            if (*bracode == OP_COND || *bracode == OP_SCOND)
5712f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
5713f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              int nlen = (int)(code - bracode);
571453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen));
5715f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              code += 1 + LINK_SIZE;
5716f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              nlen += 1 + LINK_SIZE;
57170ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              *bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS;
5718f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              *code++ = OP_KETRPOS;
5719f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              PUTINC(code, 0, nlen);
5720f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              PUT(bracode, 1, nlen);
5721f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
5722f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5723f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            /* For non-COND brackets, we modify the BRA code and use KETRPOS. */
5724f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5725f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            else
5726f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
5727f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              *bracode += 1;              /* Switch to xxxPOS opcodes */
5728f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              *ketcode = OP_KETRPOS;
5729f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
5730f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5731f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            /* If the minimum is zero, mark it as possessive, then unset the
5732f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            possessive flag when the minimum is 0 or 1. */
5733f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5734f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO;
5735f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            if (repeat_min < 2) possessive_quantifier = FALSE;
5736f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
5737f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5738f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          /* Non-possessive quantifier */
5739f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5740f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          else *ketcode = OP_KETRMAX + repeat_type;
5741f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
5742f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
5743f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
5744f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
574553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* If previous is OP_FAIL, it was generated by an empty class []
574653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    (PCRE2_ALLOW_EMPTY_CLASS is set). The other ways in which OP_FAIL can be
574753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    generated, that is by (*FAIL) or (?!), set previous to NULL, which gives a
574853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    "nothing to repeat" error above. We can just ignore the repeat in empty
574953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    class case. */
5750f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5751f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    else if (*previous == OP_FAIL) goto END_REPEAT;
5752f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5753f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Else there's some kind of shambles */
5754f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5755f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    else
5756f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
575753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      *errorcodeptr = ERR10;
5758f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      goto FAILED;
5759f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
5760f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5761f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* If the character following a repeat is '+', possessive_quantifier is
5762f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    TRUE. For some opcodes, there are special alternative opcodes for this
5763f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case. For anything else, we wrap the entire repeated item inside OP_ONCE
5764f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    brackets. Logically, the '+' notation is just syntactic sugar, taken from
5765f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    Sun's Java package, but the special opcodes can optimize it.
5766f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5767f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    Some (but not all) possessively repeated subpatterns have already been
5768f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    completely handled in the code just above. For them, possessive_quantifier
5769f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    is always FALSE at this stage. Note that the repeated item starts at
5770f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    tempcode, not at previous, which might be the first part of a string whose
5771f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    (former) last char we repeated. */
5772f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5773f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (possessive_quantifier)
5774f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
5775f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      int len;
5776f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5777f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* Possessifying an EXACT quantifier has no effect, so we can ignore it.
5778f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      However, QUERY, STAR, or UPTO may follow (for quantifiers such as {5,6},
5779f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {5,}, or {5,10}). We skip over an EXACT item; if the length of what
5780f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      remains is greater than zero, there's a further opcode that can be
5781f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      handled. If not, do nothing, leaving the EXACT alone. */
5782f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5783f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      switch(*tempcode)
5784f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
5785f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case OP_TYPEEXACT:
5786f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        tempcode += PRIV(OP_lengths)[*tempcode] +
5787f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          ((tempcode[1 + IMM2_SIZE] == OP_PROP
5788f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          || tempcode[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
5789f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        break;
5790f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5791f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* CHAR opcodes are used for exacts whose count is 1. */
5792f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5793f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case OP_CHAR:
5794f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case OP_CHARI:
5795f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case OP_NOT:
5796f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case OP_NOTI:
5797f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case OP_EXACT:
5798f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case OP_EXACTI:
5799f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case OP_NOTEXACT:
5800f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case OP_NOTEXACTI:
5801f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        tempcode += PRIV(OP_lengths)[*tempcode];
580253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
5803f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (utf && HAS_EXTRALEN(tempcode[-1]))
5804f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          tempcode += GET_EXTRALEN(tempcode[-1]);
5805f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
5806f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        break;
5807f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5808f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* For the class opcodes, the repeat operator appears at the end;
5809f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        adjust tempcode to point to it. */
5810f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5811f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case OP_CLASS:
5812f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case OP_NCLASS:
581353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        tempcode += 1 + 32/sizeof(PCRE2_UCHAR);
5814f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        break;
5815f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
581653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_WIDE_CHARS
5817f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case OP_XCLASS:
5818f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        tempcode += GET(tempcode, 1);
5819f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        break;
5820f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
5821f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
5822f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5823f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* If tempcode is equal to code (which points to the end of the repeated
5824f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      item), it means we have skipped an EXACT item but there is no following
5825f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      QUERY, STAR, or UPTO; the value of len will be 0, and we do nothing. In
5826f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      all other cases, tempcode will be pointing to the repeat opcode, and will
5827f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      be less than code, so the value of len will be greater than 0. */
5828f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5829f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      len = (int)(code - tempcode);
5830f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (len > 0)
5831f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
5832f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        unsigned int repcode = *tempcode;
5833f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5834f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* There is a table for possessifying opcodes, all of which are less
5835f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        than OP_CALLOUT. A zero entry means there is no possessified version.
5836f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        */
5837f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5838f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (repcode < OP_CALLOUT && opcode_possessify[repcode] > 0)
5839f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *tempcode = opcode_possessify[repcode];
5840f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5841f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* For opcode without a special possessified version, wrap the item in
584253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        ONCE brackets. */
5843f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5844f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        else
5845f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
584653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len));
5847f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          code += 1 + LINK_SIZE;
5848f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          len += 1 + LINK_SIZE;
5849f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          tempcode[0] = OP_ONCE;
5850f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *code++ = OP_KET;
5851f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          PUTINC(code, 0, len);
5852f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          PUT(tempcode, 1, len);
5853f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
5854f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
5855f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
5856f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5857f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* In all case we no longer have a previous item. We also set the
585853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    "follows varying string" flag for subsequently encountered reqcus if
5859f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    it isn't already set and we have just passed a varying length item. */
5860f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5861f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    END_REPEAT:
5862f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    previous = NULL;
586353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    cb->req_varyopt |= reqvary;
5864f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    break;
5865f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5866f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5867f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* ===================================================================*/
586853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Start of nested parenthesized sub-expression, or lookahead or lookbehind
586953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    or option setting or condition or all the other extended parenthesis forms.
587053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    We must save the current high-water-mark for the forward reference list so
587153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    that we know where they start for this group. However, because the list may
587253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    be extended when there are very many forward references (usually the result
587353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    of a replicated inner group), we must use an offset rather than an absolute
587453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    address. Note that (?# comments are dealt with at the top of the loop;
587553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    they do not get this far. */
5876f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5877f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case CHAR_LEFT_PARENTHESIS:
58788366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes    ptr++;
5879f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
588053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Deal with various "verbs" that can be introduced by '*'. */
5881f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5882f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':'
588353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis         || (MAX_255(ptr[1]) && ((cb->ctypes[ptr[1]] & ctype_letter) != 0))))
5884f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
5885f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      int i, namelen;
5886f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      int arglen = 0;
5887f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      const char *vn = verbnames;
588853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      PCRE2_SPTR name = ptr + 1;
588953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      PCRE2_SPTR arg = NULL;
5890f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      previous = NULL;
5891f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      ptr++;
589253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
589353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Increment ptr, set namelen, check length */
589453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
589553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      READ_NAME(ctype_letter, ERR60, *errorcodeptr);
5896f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5897f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* It appears that Perl allows any characters whatsoever, other than
5898f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      a closing parenthesis, to appear in arguments, so we no longer insist on
589953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      letters, digits, and underscores. Perl does not, however, do any
590053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      interpretation within arguments, and has no means of including a closing
590153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      parenthesis. PCRE supports escape processing but only when it is
590253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      requested by an option. Note that check_escape() will not return values
590353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      greater than the code unit maximum when not in UTF mode. */
5904f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5905f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (*ptr == CHAR_COLON)
5906f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
5907f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        arg = ++ptr;
590853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
590953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if ((options & PCRE2_ALT_VERBNAMES) == 0)
5910f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
591153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          arglen = 0;
591253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS)
591353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {
591453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            ptr++;                                /* Check length as we go */
591553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            arglen++;                             /* along, to avoid the   */
591653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            if ((unsigned int)arglen > MAX_MARK)  /* possibility of overflow. */
591753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              {
591853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              *errorcodeptr = ERR76;
591953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              goto FAILED;
592053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              }
592153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            }
592253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
592353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        else
592453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
592553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          /* The length check is in process_verb_names() */
592653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          arglen = process_verb_name(&ptr, NULL, errorcodeptr, options,
592753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            utf, cb);
592853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (arglen < 0) goto FAILED;
5929f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
5930f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
5931f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5932f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (*ptr != CHAR_RIGHT_PARENTHESIS)
5933f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
5934f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        *errorcodeptr = ERR60;
5935f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        goto FAILED;
5936f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
5937f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5938f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* Scan the table of verb names */
5939f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5940f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      for (i = 0; i < verbcount; i++)
5941f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
5942f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (namelen == verbs[i].len &&
594353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            PRIV(strncmp_c8)(name, vn, namelen) == 0)
5944f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
5945f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          int setverb;
5946f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5947f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          /* Check for open captures before ACCEPT and convert it to
5948f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          ASSERT_ACCEPT if in an assertion. */
5949f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5950f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (verbs[i].op == OP_ACCEPT)
5951f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
5952f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            open_capitem *oc;
5953f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            if (arglen != 0)
5954f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
5955f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              *errorcodeptr = ERR59;
5956f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              goto FAILED;
5957f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
595853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            cb->had_accept = TRUE;
59598b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis
59608b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis            /* In the first pass, just accumulate the length required;
59618b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis            otherwise hitting (*ACCEPT) inside many nested parentheses can
59628b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis            cause workspace overflow. */
59638b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis
596453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            for (oc = cb->open_caps; oc != NULL; oc = oc->next)
5965f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
59668b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis              if (lengthptr != NULL)
59678b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis                {
59688b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis                *lengthptr += CU2BYTES(1) + IMM2_SIZE;
59698b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis                }
59708b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis              else
59718b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis                {
59728b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis                *code++ = OP_CLOSE;
59738b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis                PUT2INC(code, 0, oc->number);
59748b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis                }
5975f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
5976f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            setverb = *code++ =
597753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              (cb->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
5978f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
597953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            /* Do not set firstcu after *ACCEPT */
598053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
5981f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
5982f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
5983f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          /* Handle other cases with/without an argument */
5984f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
598553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          else if (arglen == 0)    /* There is no argument */
5986f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
5987f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            if (verbs[i].op < 0)   /* Argument is mandatory */
5988f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
5989f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              *errorcodeptr = ERR66;
5990f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              goto FAILED;
5991f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
5992f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            setverb = *code++ = verbs[i].op;
5993f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
5994f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
599553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          else                        /* An argument is present */
5996f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
599753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            if (verbs[i].op_arg < 0)  /* Argument is forbidden */
5998f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
5999f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              *errorcodeptr = ERR59;
6000f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              goto FAILED;
6001f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
6002f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            setverb = *code++ = verbs[i].op_arg;
600353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
600453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            /* Arguments can be very long, especially in 16- and 32-bit modes,
600553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            and can overflow the workspace in the first pass. Instead of
600653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            putting the argument into memory, we just update the length counter
600753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            and set up an empty argument. */
600853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
600953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            if (lengthptr != NULL)
601053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              {
601153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              *lengthptr += arglen;
60120ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              *code++ = 0;
60130ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              }
60140ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes            else
60150ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              {
60160ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              *code++ = arglen;
601753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              if ((options & PCRE2_ALT_VERBNAMES) != 0)
601853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                {
601953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                PCRE2_UCHAR *memcode = code;  /* code is "register" */
602053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                (void)process_verb_name(&arg, &memcode, errorcodeptr, options,
602153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                  utf, cb);
602253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                code = memcode;
602353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                }
602453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              else   /* No argument processing */
602553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                {
602653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                memcpy(code, arg, CU2BYTES(arglen));
602753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                code += arglen;
602853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                }
60290ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              }
603053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
6031f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            *code++ = 0;
6032f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
6033f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6034f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          switch (setverb)
6035f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
6036f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case OP_THEN:
6037f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case OP_THEN_ARG:
603853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            cb->external_flags |= PCRE2_HASTHEN;
6039f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            break;
6040f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6041f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case OP_PRUNE:
6042f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case OP_PRUNE_ARG:
6043f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case OP_SKIP:
6044f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case OP_SKIP_ARG:
604553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            cb->had_pruneorskip = TRUE;
6046f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            break;
6047f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
6048f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6049f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          break;  /* Found verb, exit loop */
6050f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6051f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6052f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        vn += verbs[i].len + 1;
6053f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
6054f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6055f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (i < verbcount) continue;    /* Successfully handled a verb */
6056f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      *errorcodeptr = ERR60;          /* Verb not recognized */
6057f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      goto FAILED;
6058f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
6059f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
606053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Initialization for "real" parentheses */
60618366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes
60628366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes    newoptions = options;
606353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    skipunits = 0;
60648366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes    bravalue = OP_CBRA;
60658366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes    reset_bracount = FALSE;
60668366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes
6067f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Deal with the extended parentheses; all are introduced by '?', and the
6068f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    appearance of any of them means that this is not a capturing group. */
6069f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
60708366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes    if (*ptr == CHAR_QUESTION_MARK)
6071f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
607253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      int i, count;
607353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      int namelen;                /* Must be signed */
607453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      uint32_t index;
607553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      uint32_t set, unset, *optset;
607653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      named_group *ng;
607753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      PCRE2_SPTR name;
607853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      PCRE2_UCHAR *slot;
6079f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6080f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      switch (*(++ptr))
6081f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
6082f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* ------------------------------------------------------------ */
6083f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case CHAR_VERTICAL_LINE:  /* Reset capture count for each branch */
6084f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        reset_bracount = TRUE;
6085f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Fall through */
6086f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6087f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* ------------------------------------------------------------ */
6088f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case CHAR_COLON:          /* Non-capturing bracket */
6089f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        bravalue = OP_BRA;
6090f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        ptr++;
6091f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        break;
6092f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6093f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* ------------------------------------------------------------ */
6094f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case CHAR_LEFT_PARENTHESIS:
6095f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        bravalue = OP_COND;       /* Conditional group */
6096f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        tempptr = ptr;
6097f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6098f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* A condition can be an assertion, a number (referring to a numbered
6099f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        group's having been set), a name (referring to a named group), or 'R',
6100f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        referring to recursion. R<digits> and R&name are also permitted for
6101f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        recursion tests.
6102f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6103f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        There are ways of testing a named group: (?(name)) is used by Python;
6104f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        Perl 5.10 onwards uses (?(<name>) or (?('name')).
6105f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6106f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        There is one unfortunate ambiguity, caused by history. 'R' can be the
6107f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        recursive thing or the name 'R' (and similarly for 'R' followed by
6108f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        digits). We look for a name first; if not found, we try the other case.
6109f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6110f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        For compatibility with auto-callouts, we allow a callout to be
6111f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        specified before a condition that is an assertion. First, check for the
6112f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        syntax of a callout; if found, adjust the temporary pointer that is
6113f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        used to check for an assertion condition. That's all that is needed! */
6114f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6115f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C)
6116f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
611753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (IS_DIGIT(ptr[3]) || ptr[3] == CHAR_RIGHT_PARENTHESIS)
611853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {
611953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break;
612053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            if (ptr[i] == CHAR_RIGHT_PARENTHESIS)
612153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              tempptr += i + 1;
612253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            }
612353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          else
612453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {
612553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            uint32_t delimiter = 0;
612653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
612753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              {
612853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              if (ptr[3] == PRIV(callout_start_delims)[i])
612953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                {
613053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                delimiter = PRIV(callout_end_delims)[i];
613153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                break;
613253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                }
613353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              }
613453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            if (delimiter != 0)
613553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              {
613653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              for (i = 4; ptr + i < cb->end_pattern; i++)
613753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                {
613853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                if (ptr[i] == delimiter)
613953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                  {
614053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                  if (ptr[i+1] == delimiter) i++;
614153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                  else
614253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                    {
614353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                    if (ptr[i+1] == CHAR_RIGHT_PARENTHESIS) tempptr += i + 2;
614453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                    break;
614553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                    }
614653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                  }
614753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                }
614853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              }
614953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            }
615053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
615153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          /* tempptr should now be pointing to the opening parenthesis of the
615253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          assertion condition. */
615353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
615453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (*tempptr != CHAR_LEFT_PARENTHESIS)
615553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {
615653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            *errorcodeptr = ERR28;
615753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            goto FAILED;
615853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            }
6159f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6160f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6161f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* For conditions that are assertions, check the syntax, and then exit
616253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        the switch. This will take control down to where bracketed groups
616353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        are processed. The assertion will be handled as part of the group,
616453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        but we need to identify this case because the conditional assertion may
616553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        not be quantifier. */
6166f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6167f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (tempptr[1] == CHAR_QUESTION_MARK &&
6168f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              (tempptr[2] == CHAR_EQUALS_SIGN ||
6169f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich               tempptr[2] == CHAR_EXCLAMATION_MARK ||
61708366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes                 (tempptr[2] == CHAR_LESS_THAN_SIGN &&
61718366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes                   (tempptr[3] == CHAR_EQUALS_SIGN ||
61728366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes                    tempptr[3] == CHAR_EXCLAMATION_MARK))))
61738366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes          {
617453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          cb->iscondassert = TRUE;
6175f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          break;
61768366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes          }
6177f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6178f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Other conditions use OP_CREF/OP_DNCREF/OP_RREF/OP_DNRREF, and all
6179f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        need to skip at least 1+IMM2_SIZE bytes at the start of the group. */
6180f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6181f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        code[1+LINK_SIZE] = OP_CREF;
618253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        skipunits = 1+IMM2_SIZE;
6183f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        refsign = -1;     /* => not a number */
6184f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        namelen = -1;     /* => not a name; must set to avoid warning */
6185f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        name = NULL;      /* Always set to avoid warning */
6186f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        recno = 0;        /* Always set to avoid warning */
6187f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
618853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* Point at character after (?( */
6189f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6190f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        ptr++;
619153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
619253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* Check for (?(VERSION[>]=n.m), which is a facility whereby indirect
619353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        users of PCRE2 via an application can discover which release of PCRE2
619453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        is being used. */
619553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
619653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 &&
619753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            ptr[7] != CHAR_RIGHT_PARENTHESIS)
619853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
619953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          BOOL ge = FALSE;
620053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          int major = 0;
620153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          int minor = 0;
620253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
620353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          ptr += 7;
620453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (*ptr == CHAR_GREATER_THAN_SIGN)
620553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {
620653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            ge = TRUE;
620753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            ptr++;
620853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            }
620953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
621053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          /* NOTE: cannot write IS_DIGIT(*(++ptr)) here because IS_DIGIT
621153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          references its argument twice. */
621253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
621353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (*ptr != CHAR_EQUALS_SIGN || (ptr++, !IS_DIGIT(*ptr)))
621453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {
621553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            *errorcodeptr = ERR79;
621653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            goto FAILED;
621753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            }
621853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
621953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          while (IS_DIGIT(*ptr)) major = major * 10 + *ptr++ - '0';
622053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (*ptr == CHAR_DOT)
622153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {
622253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            ptr++;
622353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            while (IS_DIGIT(*ptr)) minor = minor * 10 + *ptr++ - '0';
622453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            if (minor < 10) minor *= 10;
622553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            }
622653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
622753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (*ptr != CHAR_RIGHT_PARENTHESIS || minor > 99)
622853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {
622953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            *errorcodeptr = ERR79;
623053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            goto FAILED;
623153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            }
623253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
623353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (ge)
623453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            code[1+LINK_SIZE] = ((PCRE2_MAJOR > major) ||
623553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              (PCRE2_MAJOR == major && PCRE2_MINOR >= minor))?
623653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                OP_TRUE : OP_FALSE;
623753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          else
623853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            code[1+LINK_SIZE] = (PCRE2_MAJOR == major && PCRE2_MINOR == minor)?
623953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              OP_TRUE : OP_FALSE;
624053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
624153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          ptr++;
624253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          skipunits = 1;
624353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          break;  /* End of condition processing */
624453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
624553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
624653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* Check for a test for recursion in a named group. */
624753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
6248f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND)
6249f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
6250f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          terminator = -1;
6251f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          ptr += 2;
6252f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          code[1+LINK_SIZE] = OP_RREF;    /* Change the type of test */
6253f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6254f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6255f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Check for a test for a named group's having been set, using the Perl
6256f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        syntax (?(<name>) or (?('name'), and also allow for the original PCRE
6257f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        syntax of (?(name) or for (?(+n), (?(-n), and just (?(n). */
6258f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6259f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        else if (*ptr == CHAR_LESS_THAN_SIGN)
6260f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
6261f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          terminator = CHAR_GREATER_THAN_SIGN;
6262f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          ptr++;
6263f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6264f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        else if (*ptr == CHAR_APOSTROPHE)
6265f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
6266f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          terminator = CHAR_APOSTROPHE;
6267f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          ptr++;
6268f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6269f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        else
6270f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
6271f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          terminator = CHAR_NULL;
6272f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (*ptr == CHAR_MINUS || *ptr == CHAR_PLUS) refsign = *ptr++;
6273f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            else if (IS_DIGIT(*ptr)) refsign = 0;
6274f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6275f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6276f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Handle a number */
6277f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6278f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (refsign >= 0)
6279f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
6280f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          while (IS_DIGIT(*ptr))
6281f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
62820ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes            if (recno > INT_MAX / 10 - 1)  /* Integer overflow */
62830ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              {
62840ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              while (IS_DIGIT(*ptr)) ptr++;
62850ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              *errorcodeptr = ERR61;
62860ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              goto FAILED;
62870ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              }
6288f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            recno = recno * 10 + (int)(*ptr - CHAR_0);
6289f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            ptr++;
6290f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
6291f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6292f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6293f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Otherwise we expect to read a name; anything else is an error. When
629453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        the referenced name is one of a number of duplicates, a different
629553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        opcode is used and it needs more memory. Unfortunately we cannot tell
629653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        whether this is the case in the first pass, so we have to allow for
629753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        more memory always. In the second pass, the additional to skipunits
629853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        happens later. */
6299f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6300f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        else
6301f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
6302f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (IS_DIGIT(*ptr))
6303f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
630453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            *errorcodeptr = ERR44;  /* Group name must start with non-digit */
6305f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            goto FAILED;
6306f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
630753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (!MAX_255(*ptr) || (cb->ctypes[*ptr] & ctype_word) == 0)
6308f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
6309f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            *errorcodeptr = ERR28;   /* Assertion expected */
6310f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            goto FAILED;
6311f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
631253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          name = ptr;
631353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          /* Increment ptr, set namelen, check length */
631453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          READ_NAME(ctype_word, ERR48, *errorcodeptr);
631553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (lengthptr != NULL) skipunits += IMM2_SIZE;
6316f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6317f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6318f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Check the terminator */
6319f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
632053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if ((terminator > 0 && *ptr++ != (PCRE2_UCHAR)terminator) ||
6321f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            *ptr++ != CHAR_RIGHT_PARENTHESIS)
6322f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
6323f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          ptr--;                  /* Error offset */
6324f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *errorcodeptr = ERR26;  /* Malformed number or name */
6325f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          goto FAILED;
6326f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6327f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6328f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Do no further checking in the pre-compile phase. */
6329f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6330f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (lengthptr != NULL) break;
6331f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6332f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* In the real compile we do the work of looking for the actual
6333f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        reference. If refsign is not negative, it means we have a number in
6334f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        recno. */
6335f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6336f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (refsign >= 0)
6337f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
6338f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (recno <= 0)
6339f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
6340f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            *errorcodeptr = ERR35;
6341f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            goto FAILED;
6342f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
6343f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (refsign != 0) recno = (refsign == CHAR_MINUS)?
634453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            (cb->bracount + 1) - recno : recno + cb->bracount;
634553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (recno <= 0 || (uint32_t)recno > cb->final_bracount)
6346f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
6347f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            *errorcodeptr = ERR15;
6348f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            goto FAILED;
6349f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
6350f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          PUT2(code, 2+LINK_SIZE, recno);
635153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if ((uint32_t)recno > cb->top_backref) cb->top_backref = recno;
6352f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          break;
6353f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6354f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6355f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Otherwise look for the name. */
6356f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
635753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        slot = cb->name_table;
635853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        for (i = 0; i < cb->names_found; i++)
6359f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
636053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (PRIV(strncmp)(name, slot+IMM2_SIZE, namelen) == 0) break;
636153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          slot += cb->name_entry_size;
6362f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6363f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6364f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Found the named subpattern. If the name is duplicated, add one to
6365f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        the opcode to change CREF/RREF into DNCREF/DNRREF and insert
6366f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        appropriate data values. Otherwise, just insert the unique subpattern
6367f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        number. */
6368f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
636953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (i < cb->names_found)
6370f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
637153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          int offset = i;            /* Offset of first name found */
637253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
637353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          count = 0;
637453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          for (;;)
6375f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
637653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            recno = GET2(slot, 0);   /* Number for last found */
637753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            if ((uint32_t)recno > cb->top_backref) cb->top_backref = recno;
6378f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            count++;
637953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            if (++i >= cb->names_found) break;
638053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            slot += cb->name_entry_size;
638153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            if (PRIV(strncmp)(name, slot+IMM2_SIZE, namelen) != 0 ||
638253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              (slot+IMM2_SIZE)[namelen] != 0) break;
6383f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
6384f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6385f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (count > 1)
6386f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
6387f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            PUT2(code, 2+LINK_SIZE, offset);
6388f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            PUT2(code, 2+LINK_SIZE+IMM2_SIZE, count);
638953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            skipunits += IMM2_SIZE;
6390f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            code[1+LINK_SIZE]++;
6391f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
6392f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          else  /* Not a duplicated name */
6393f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
6394f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            PUT2(code, 2+LINK_SIZE, recno);
6395f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
6396f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6397f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6398f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* If terminator == CHAR_NULL it means that the name followed directly
6399f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        after the opening parenthesis [e.g. (?(abc)...] and in this case there
6400f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        are some further alternatives to try. For the cases where terminator !=
6401f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        CHAR_NULL [things like (?(<name>... or (?('name')... or (?(R&name)... ]
6402f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        we have now checked all the possibilities, so give an error. */
6403f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6404f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        else if (terminator != CHAR_NULL)
6405f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
6406f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *errorcodeptr = ERR15;
6407f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          goto FAILED;
6408f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6409f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6410f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Check for (?(R) for recursion. Allow digits after R to specify a
6411f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        specific group number. */
6412f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6413f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        else if (*name == CHAR_R)
6414f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
6415f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          recno = 0;
6416f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          for (i = 1; i < namelen; i++)
6417f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
6418f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            if (!IS_DIGIT(name[i]))
6419f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
642053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              *errorcodeptr = ERR15;        /* Non-existent subpattern */
6421f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              goto FAILED;
6422f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
64230ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes            if (recno > INT_MAX / 10 - 1)   /* Integer overflow */
64240ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              {
64250ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              *errorcodeptr = ERR61;
64260ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              goto FAILED;
64270ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              }
6428f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            recno = recno * 10 + name[i] - CHAR_0;
6429f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
6430f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (recno == 0) recno = RREF_ANY;
6431f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          code[1+LINK_SIZE] = OP_RREF;      /* Change test type */
6432f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          PUT2(code, 2+LINK_SIZE, recno);
6433f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6434f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6435f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Similarly, check for the (?(DEFINE) "condition", which is always
643653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        false. During compilation we set OP_DEFINE to distinguish this from
643753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        other OP_FALSE conditions so that it can be checked for having only one
643853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        branch, but after that the opcode is changed to OP_FALSE. */
6439f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
644053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        else if (namelen == 6 && PRIV(strncmp_c8)(name, STRING_DEFINE, 6) == 0)
6441f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
644253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          code[1+LINK_SIZE] = OP_DEFINE;
644353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          skipunits = 1;
6444f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6445f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6446f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Reference to an unidentified subpattern. */
6447f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6448f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        else
6449f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
6450f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *errorcodeptr = ERR15;
6451f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          goto FAILED;
6452f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6453f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        break;
6454f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6455f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6456f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* ------------------------------------------------------------ */
6457f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case CHAR_EQUALS_SIGN:                 /* Positive lookahead */
6458f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        bravalue = OP_ASSERT;
645953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        cb->assert_depth += 1;
6460f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        ptr++;
6461f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        break;
6462f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6463f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Optimize (?!) to (*FAIL) unless it is quantified - which is a weird
6464f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        thing to do, but Perl allows all assertions to be quantified, and when
6465f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        they contain capturing parentheses there may be a potential use for
6466f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        this feature. Not that that applies to a quantified (?!) but we allow
6467f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        it for uniformity. */
6468f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6469f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* ------------------------------------------------------------ */
6470f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case CHAR_EXCLAMATION_MARK:            /* Negative lookahead */
6471f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        ptr++;
6472f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (*ptr == CHAR_RIGHT_PARENTHESIS && ptr[1] != CHAR_ASTERISK &&
6473f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich             ptr[1] != CHAR_PLUS && ptr[1] != CHAR_QUESTION_MARK &&
6474f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            (ptr[1] != CHAR_LEFT_CURLY_BRACKET || !is_counted_repeat(ptr+2)))
6475f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
6476f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *code++ = OP_FAIL;
6477f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          previous = NULL;
6478f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          continue;
6479f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6480f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        bravalue = OP_ASSERT_NOT;
648153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        cb->assert_depth += 1;
6482f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        break;
6483f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6484f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6485f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* ------------------------------------------------------------ */
6486f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case CHAR_LESS_THAN_SIGN:              /* Lookbehind or named define */
6487f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        switch (ptr[1])
6488f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
6489f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          case CHAR_EQUALS_SIGN:               /* Positive lookbehind */
6490f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          bravalue = OP_ASSERTBACK;
649153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          cb->assert_depth += 1;
6492f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          ptr += 2;
6493f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          break;
6494f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6495f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          case CHAR_EXCLAMATION_MARK:          /* Negative lookbehind */
6496f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          bravalue = OP_ASSERTBACK_NOT;
649753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          cb->assert_depth += 1;
6498f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          ptr += 2;
6499f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          break;
6500f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
650153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          /* Must be a name definition - as the syntax was checked in the
650253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          pre-pass, we can assume here that it is valid. Skip over the name
650353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          and go to handle the numbered group. */
650453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
650553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          default:
650653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          while (*(++ptr) != CHAR_GREATER_THAN_SIGN);
650753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          ptr++;
650853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          goto NUMBERED_GROUP;
6509f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6510f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        break;
6511f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6512f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6513f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* ------------------------------------------------------------ */
6514f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case CHAR_GREATER_THAN_SIGN:           /* One-time brackets */
6515f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        bravalue = OP_ONCE;
6516f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        ptr++;
6517f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        break;
6518f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6519f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6520f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* ------------------------------------------------------------ */
652153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        case CHAR_C:                 /* Callout */
6522f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        previous_callout = code;     /* Save for later completion */
6523f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        after_manual_callout = 1;    /* Skip one item before completing */
652453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        ptr++;                       /* Character after (?C */
6525f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
652653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* A callout may have a string argument, delimited by one of a fixed
652753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        number of characters, or an undelimited numerical argument, or no
652853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        argument, which is the same as (?C0). Different opcodes are used for
652953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        the two cases. */
6530f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
653153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (*ptr != CHAR_RIGHT_PARENTHESIS && !IS_DIGIT(*ptr))
6532f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
653353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          uint32_t delimiter = 0;
6534f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
653553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
6536f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
653753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            if (*ptr == PRIV(callout_start_delims)[i])
653853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              {
653953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              delimiter = PRIV(callout_end_delims)[i];
654053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              break;
654153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              }
6542f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
6543f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
654453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (delimiter == 0)
6545f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
654653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            *errorcodeptr = ERR82;
6547f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            goto FAILED;
6548f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
6549f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
655053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          /* During the pre-compile phase, we parse the string and update the
655153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          length. There is no need to generate any code. (In fact, the string
655253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          has already been parsed in the pre-pass that looks for named
655353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          parentheses, but it does no harm to leave this code in.) */
655453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
655553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (lengthptr != NULL)     /* Only check the string */
6556f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
655753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            PCRE2_SPTR start = ptr;
655853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            do
6559f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
656053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              if (++ptr >= cb->end_pattern)
656153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                {
656253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                *errorcodeptr = ERR81;
656353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                ptr = start;   /* To give a more useful message */
656453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                goto FAILED;
656553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                }
656653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              if (ptr[0] == delimiter && ptr[1] == delimiter) ptr += 2;
6567f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
656853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            while (ptr[0] != delimiter);
656953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
657053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            /* Start points to the opening delimiter, ptr points to the
657153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            closing delimiter. We must allow for including the delimiter and
657253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            for the terminating zero. Any doubled delimiters within the string
657353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            make this an overestimate, but it is not worth bothering about. */
657453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
657553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            (*lengthptr) += (ptr - start) + 2 + (1 + 4*LINK_SIZE);
6576f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
6577f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
657853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          /* In the real compile we can copy the string, knowing that it is
657953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          syntactically OK. The starting delimiter is included so that the
658053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          client can discover it if they want. We also pass the start offset to
658153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          help a script language give better error messages. */
6582f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
658353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          else
6584f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
658553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            PCRE2_UCHAR *callout_string = code + (1 + 4*LINK_SIZE);
658653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            *callout_string++ = *ptr++;
658753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            PUT(code, 1 + 3*LINK_SIZE, (int)(ptr - cb->start_pattern)); /* Start offset */
658853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            for(;;)
6589f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
659053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              if (*ptr == delimiter)
6591f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                {
659253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                if (ptr[1] == delimiter) ptr++; else break;
6593f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                }
659453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              *callout_string++ = *ptr++;
6595f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
659653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            *callout_string++ = CHAR_NULL;
659753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            code[0] = OP_CALLOUT_STR;
659853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            PUT(code, 1, (int)(ptr + 2 - cb->start_pattern)); /* Next offset */
659953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            PUT(code, 1 + LINK_SIZE, 0);      /* Default length */
660053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            PUT(code, 1 + 2*LINK_SIZE,        /* Compute size */
660153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                (int)(callout_string - code));
660253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            code = callout_string;
660353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            }
660453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
660553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          /* Advance to what should be the closing parenthesis, which is
660653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          checked below. */
660753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
660853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          ptr++;
660953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
661053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
661153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* Handle a callout with an optional numerical argument, which must be
661253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        less than or equal to 255. A missing argument gives 0. */
661353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
661453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        else
661553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
661653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          int n = 0;
661753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          code[0] = OP_CALLOUT;     /* Numerical callout */
661853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          while (IS_DIGIT(*ptr))
661953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {
662053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            n = n * 10 + *ptr++ - CHAR_0;
662153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            if (n > 255)
6622f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
662353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              *errorcodeptr = ERR38;
6624f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              goto FAILED;
6625f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
6626f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
662753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          PUT(code, 1, (int)(ptr - cb->start_pattern + 1));  /* Next offset */
662853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          PUT(code, 1 + LINK_SIZE, 0);                    /* Default length */
662953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          code[1 + 2*LINK_SIZE] = n;                      /* Callout number */
663053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          code += PRIV(OP_lengths)[OP_CALLOUT];
663153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
6632f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
663353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* Both formats must have a closing parenthesis */
6634f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
663553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (*ptr != CHAR_RIGHT_PARENTHESIS)
663653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
663753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          *errorcodeptr = ERR39;
663853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          goto FAILED;
663953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
6640f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
664153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* Callouts cannot be quantified. */
6642f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
664353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        previous = NULL;
664453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        continue;
6645f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
664653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
664753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* ------------------------------------------------------------ */
664853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        case CHAR_P:              /* Python-style named subpattern handling */
664953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (*(++ptr) == CHAR_EQUALS_SIGN ||
665053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            *ptr == CHAR_GREATER_THAN_SIGN)  /* Reference or recursion */
665153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
665253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          is_recurse = *ptr == CHAR_GREATER_THAN_SIGN;
665353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          terminator = CHAR_RIGHT_PARENTHESIS;
665453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          goto NAMED_REF_OR_RECURSE;
665553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
665653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        else if (*ptr != CHAR_LESS_THAN_SIGN)  /* Test for Python-style defn */
665753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
665853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          *errorcodeptr = ERR41;
665953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          goto FAILED;
6660f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
666153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* Fall through to handle (?P< as (?< is handled */
666253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
666353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
666453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* ------------------------------------------------------------ */
666553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        case CHAR_APOSTROPHE:   /* Define a name - note fall through above */
666653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
666753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* The syntax was checked and the list of names was set up in the
666853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        pre-pass, so there is nothing to be done now except to skip over the
666953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        name. */
6670f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
667153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        terminator = (*ptr == CHAR_LESS_THAN_SIGN)?
667253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                  CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
667353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        while (*(++ptr) != (unsigned int)terminator);
667453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        ptr++;
667553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        goto NUMBERED_GROUP;      /* Set up numbered group */
6676f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6677f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6678f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* ------------------------------------------------------------ */
6679f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case CHAR_AMPERSAND:            /* Perl recursion/subroutine syntax */
6680f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        terminator = CHAR_RIGHT_PARENTHESIS;
6681f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        is_recurse = TRUE;
6682f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Fall through */
6683f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6684f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* We come here from the Python syntax above that handles both
6685f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        references (?P=name) and recursion (?P>name), as well as falling
6686f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        through from the Perl recursion syntax (?&name). We also come here from
6687f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        the Perl \k<name> or \k'name' back reference syntax and the \k{name}
6688f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        .NET syntax, and the Oniguruma \g<...> and \g'...' subroutine syntax. */
6689f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6690f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        NAMED_REF_OR_RECURSE:
6691f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        name = ++ptr;
6692f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (IS_DIGIT(*ptr))
6693f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
669453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          *errorcodeptr = ERR44;   /* Group name must start with non-digit */
6695f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          goto FAILED;
6696f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
669753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* Increment ptr, set namelen, check length */
669853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        READ_NAME(ctype_word, ERR48, *errorcodeptr);
6699f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
670053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* In the pre-compile phase, do a syntax check. */
6701f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6702f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (lengthptr != NULL)
6703f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
6704f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (namelen == 0)
6705f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
6706f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            *errorcodeptr = ERR62;
6707f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            goto FAILED;
6708f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
670953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (*ptr != (PCRE2_UCHAR)terminator)
6710f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
6711f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            *errorcodeptr = ERR42;
6712f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            goto FAILED;
6713f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
671453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
6715f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
671653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* Scan the list of names generated in the pre-pass in order to get
671753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        a number and whether or not this name is duplicated. */
67180ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes
671953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        recno = 0;
672053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        is_dupname = FALSE;
672153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        ng = cb->named_groups;
67220ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes
672353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        for (i = 0; i < cb->names_found; i++, ng++)
672453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
672553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (namelen == ng->length &&
672653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              PRIV(strncmp)(name, ng->name, namelen) == 0)
672753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {
672853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            open_capitem *oc;
672953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            is_dupname = ng->isdup;
673053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            recno = ng->number;
67310ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes
673253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            /* For a recursion, that's all that is needed. We can now go to the
673353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            code that handles numerical recursion. */
67340ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes
673553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            if (is_recurse) goto HANDLE_RECURSION;
67360ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes
673753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            /* For a back reference, update the back reference map and the
673853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            maximum back reference. Then for each group we must check to see if
673953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            it is recursive, that is, it is inside the group that it
674053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            references. A flag is set so that the group can be made atomic. */
67410ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes
674253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            cb->backref_map |= (recno < 32)? (1u << recno) : 1;
674353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            if ((uint32_t)recno > cb->top_backref) cb->top_backref = recno;
67440ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes
674553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            for (oc = cb->open_caps; oc != NULL; oc = oc->next)
67460ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              {
674753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              if (oc->number == recno)
67480ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes                {
674953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                oc->flag = TRUE;
675053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                break;
67510ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes                }
67520ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              }
67530ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes            }
6754f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6755f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
675653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* If the name was not found we have a bad reference. */
6757f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
675853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (recno == 0)
6759f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
676053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          *errorcodeptr = ERR15;
676153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          goto FAILED;
6762f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6763f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
676453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* If a back reference name is not duplicated, we can handle it as a
676553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        numerical reference. */
6766f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
676753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (!is_dupname) goto HANDLE_REFERENCE;
6768f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
676953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* If a back reference name is duplicated, we generate a different
677053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        opcode to a numerical back reference. In the second pass we must search
677153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        for the index and count in the final name table. */
6772f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
677353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        count = 0;
677453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        index = 0;
6775f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
677653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (lengthptr == NULL)
677753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
677853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          slot = cb->name_table;
677953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          for (i = 0; i < cb->names_found; i++)
6780f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
678153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            if (PRIV(strncmp)(name, slot+IMM2_SIZE, namelen) == 0 &&
678253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                slot[IMM2_SIZE+namelen] == 0)
6783f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
678453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              if (count == 0) index = i;
678553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              count++;
6786f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
678753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            slot += cb->name_entry_size;
678853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            }
6789f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
679053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (count == 0)
679153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {
679253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            *errorcodeptr = ERR15;
679353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            goto FAILED;
6794f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
6795f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6796f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
679753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
679853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        previous = code;
679953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        *code++ = ((options & PCRE2_CASELESS) != 0)? OP_DNREFI : OP_DNREF;
680053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        PUT2INC(code, 0, index);
680153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        PUT2INC(code, 0, count);
680253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        continue;  /* End of back ref handling */
6803f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6804f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6805f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* ------------------------------------------------------------ */
68060ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes        case CHAR_R:              /* Recursion, same as (?0) */
68070ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes        recno = 0;
68080ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes        if (*(++ptr) != CHAR_RIGHT_PARENTHESIS)
68090ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes          {
68100ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes          *errorcodeptr = ERR29;
68110ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes          goto FAILED;
68120ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes          }
68130ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes        goto HANDLE_RECURSION;
6814f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6815f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6816f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* ------------------------------------------------------------ */
6817f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case CHAR_MINUS: case CHAR_PLUS:  /* Recursion or subroutine */
6818f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
6819f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
6820f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
6821f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          terminator = CHAR_RIGHT_PARENTHESIS;
6822f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6823f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          /* Come here from the \g<...> and \g'...' code (Oniguruma
6824f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          compatibility). However, the syntax has been checked to ensure that
6825f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          the ... are a (signed) number, so that neither ERR63 nor ERR29 will
6826f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          be called on this path, nor with the jump to OTHER_CHAR_AFTER_QUERY
6827f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          ever be taken. */
6828f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6829f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          HANDLE_NUMERICAL_RECURSION:
6830f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6831f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if ((refsign = *ptr) == CHAR_PLUS)
6832f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
6833f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            ptr++;
6834f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            if (!IS_DIGIT(*ptr))
6835f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
6836f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              *errorcodeptr = ERR63;
6837f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              goto FAILED;
6838f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
6839f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
6840f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          else if (refsign == CHAR_MINUS)
6841f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
6842f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            if (!IS_DIGIT(ptr[1]))
6843f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              goto OTHER_CHAR_AFTER_QUERY;
6844f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            ptr++;
6845f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
6846f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6847f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          recno = 0;
684853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          while (IS_DIGIT(*ptr))
68490ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes            {
68500ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes            if (recno > INT_MAX / 10 - 1) /* Integer overflow */
68510ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              {
68520ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              while (IS_DIGIT(*ptr)) ptr++;
68530ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              *errorcodeptr = ERR61;
68540ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              goto FAILED;
68550ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes              }
6856f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            recno = recno * 10 + *ptr++ - CHAR_0;
68570ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes            }
6858f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
685953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (*ptr != (PCRE2_UCHAR)terminator)
6860f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
6861f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            *errorcodeptr = ERR29;
6862f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            goto FAILED;
6863f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
6864f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6865f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (refsign == CHAR_MINUS)
6866f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
6867f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            if (recno == 0)
6868f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
6869f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              *errorcodeptr = ERR58;
6870f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              goto FAILED;
6871f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
687253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            recno = (int)(cb->bracount + 1) - recno;
6873f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            if (recno <= 0)
6874f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
6875f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              *errorcodeptr = ERR15;
6876f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              goto FAILED;
6877f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
6878f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
6879f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          else if (refsign == CHAR_PLUS)
6880f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
688153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            if (recno == 0)
6882f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              {
688353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis              *errorcodeptr = ERR58;
6884f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              goto FAILED;
6885f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich              }
688653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            recno += cb->bracount;
688753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            }
688853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
688953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if ((uint32_t)recno > cb->final_bracount)
689053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {
689153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            *errorcodeptr = ERR15;
689253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            goto FAILED;
6893f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
6894f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
689553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          /* Come here from code above that handles a named recursion.
689653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          We insert the number of the called group after OP_RECURSE. At the
689753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          end of compiling the pattern is scanned and these numbers are
689853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          replaced by offsets within the pattern. It is done like this to avoid
689953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          problems with forward references and adjusting offsets when groups
690053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          are duplicated and moved (as discovered in previous implementations).
690153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          Note that a recursion does not have a set first character (relevant
690253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if it is repeated, because it will then be wrapped with ONCE
690353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          brackets). */
6904f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
690553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          HANDLE_RECURSION:
690653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          previous = code;
6907f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *code = OP_RECURSE;
690853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          PUT(code, 1, recno);
6909f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          code += 1 + LINK_SIZE;
691053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          groupsetfirstcu = FALSE;
691153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          cb->had_recurse = TRUE;
6912f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6913f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6914f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Can't determine a first byte now */
6915f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
691653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
6917f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        continue;
6918f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6919f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6920f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* ------------------------------------------------------------ */
6921f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        default:              /* Other characters: check option setting */
6922f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        OTHER_CHAR_AFTER_QUERY:
6923f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        set = unset = 0;
6924f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        optset = &set;
6925f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6926f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON)
6927f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
6928f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          switch (*ptr++)
6929f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
6930f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case CHAR_MINUS: optset = &unset; break;
6931f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6932f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            case CHAR_J:    /* Record that it changed in the external options */
693353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            *optset |= PCRE2_DUPNAMES;
693453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            cb->external_flags |= PCRE2_JCHANGED;
6935f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            break;
6936f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
693753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            case CHAR_i: *optset |= PCRE2_CASELESS; break;
693853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            case CHAR_m: *optset |= PCRE2_MULTILINE; break;
693953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            case CHAR_s: *optset |= PCRE2_DOTALL; break;
694053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            case CHAR_x: *optset |= PCRE2_EXTENDED; break;
694153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            case CHAR_U: *optset |= PCRE2_UNGREEDY; break;
6942f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
694353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            default:  *errorcodeptr = ERR11;
6944f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                      ptr--;    /* Correct the offset */
6945f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                      goto FAILED;
6946f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
6947f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6948f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6949f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Set up the changed option bits, but don't change anything yet. */
6950f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6951f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        newoptions = (options | set) & (~unset);
6952f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6953f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* If the options ended with ')' this is not the start of a nested
695453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        group with option changes, so the options change at this level. They
695553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        must also be passed back for use in subsequent branches. Reset the
695653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        greedy defaults and the case value for firstcu and reqcu. */
6957f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6958f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (*ptr == CHAR_RIGHT_PARENTHESIS)
6959f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
6960f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *optionsptr = options = newoptions;
696153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          greedy_default = ((newoptions & PCRE2_UNGREEDY) != 0);
696253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          greedy_non_default = greedy_default ^ 1;
696353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          req_caseopt = ((newoptions & PCRE2_CASELESS) != 0)? REQ_CASELESS:0;
6964f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          previous = NULL;       /* This item can't be repeated */
6965f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          continue;              /* It is complete */
6966f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
6967f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6968f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* If the options ended with ':' we are heading into a nested group
6969f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        with possible change of options. Such groups are non-capturing and are
6970f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        not assertions of any kind. All we need to do is skip over the ':';
6971f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        the newoptions value is handled below. */
6972f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6973f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        bravalue = OP_BRA;
6974f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        ptr++;
6975f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }     /* End of switch for character following (? */
6976f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }       /* End of (? handling */
6977f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
697853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Opening parenthesis not followed by '*' or '?'. If PCRE2_NO_AUTO_CAPTURE
6979f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    is set, all unadorned brackets become non-capturing and behave like (?:...)
6980f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    brackets. */
6981f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
698253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    else if ((options & PCRE2_NO_AUTO_CAPTURE) != 0)
6983f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
6984f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      bravalue = OP_BRA;
6985f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
6986f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6987f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Else we have a capturing group. */
6988f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6989f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    else
6990f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
6991f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      NUMBERED_GROUP:
699253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      cb->bracount += 1;
699353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      PUT2(code, 1+LINK_SIZE, cb->bracount);
699453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      skipunits = IMM2_SIZE;
6995f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
6996f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
6997f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Process nested bracketed regex. First check for parentheses nested too
6998f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    deeply. */
6999f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
700053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if ((cb->parens_depth += 1) > (int)(cb->cx->parens_nest_limit))
7001f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
700253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      *errorcodeptr = ERR19;
7003f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      goto FAILED;
7004f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
7005f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
70068366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes    /* All assertions used not to be repeatable, but this was changed for Perl
70078366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes    compatibility. All kinds can now be repeated except for assertions that are
70088366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes    conditions (Perl also forbids these to be repeated). We copy code into a
7009f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    non-register variable (tempcode) in order to be able to pass its address
70108366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes    because some compilers complain otherwise. At the start of a conditional
701153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    group whose condition is an assertion, cb->iscondassert is set. We unset it
70128366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes    here so as to allow assertions later in the group to be quantified. */
70138366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes
70148366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes    if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT &&
701553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        cb->iscondassert)
70168366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes      {
70178366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes      previous = NULL;
701853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      cb->iscondassert = FALSE;
70198366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes      }
70200ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes    else
70210ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes      {
70220ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes      previous = code;
70230ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes      }
7024f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7025f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    *code = bravalue;
7026f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    tempcode = code;
702753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    tempreqvary = cb->req_varyopt;        /* Save value before bracket */
702853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    tempbracount = cb->bracount;          /* Save value before bracket */
7029f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    length_prevgroup = 0;                 /* Initialize for pre-compile phase */
7030f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7031f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (!compile_regex(
7032f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich         newoptions,                      /* The complete new option state */
7033f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich         &tempcode,                       /* Where to put code (updated) */
7034f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich         &ptr,                            /* Input pointer (updated) */
7035f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich         errorcodeptr,                    /* Where to put an error message */
7036f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich         (bravalue == OP_ASSERTBACK ||
7037f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
7038f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich         reset_bracount,                  /* True if (?| group */
703953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis         skipunits,                       /* Skip over bracket number */
7040f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich         cond_depth +
7041f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich           ((bravalue == OP_COND)?1:0),   /* Depth of condition subpatterns */
704253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis         &subfirstcu,                     /* For possible first char */
704353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis         &subfirstcuflags,
704453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis         &subreqcu,                       /* For possible last char */
704553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis         &subreqcuflags,
7046f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich         bcptr,                           /* Current branch chain */
704753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis         cb,                              /* Compile data block */
7048f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich         (lengthptr == NULL)? NULL :      /* Actual compile phase */
7049f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich           &length_prevgroup              /* Pre-compile phase */
7050f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich         ))
7051f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      goto FAILED;
7052f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
705353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    cb->parens_depth -= 1;
7054f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7055f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* If this was an atomic group and there are no capturing groups within it,
7056f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    generate OP_ONCE_NC instead of OP_ONCE. */
7057f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
705853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (bravalue == OP_ONCE && cb->bracount <= tempbracount)
7059f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      *code = OP_ONCE_NC;
7060f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7061f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
706253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      cb->assert_depth -= 1;
7063f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7064f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* At the end of compiling, code is still pointing to the start of the
7065f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    group, while tempcode has been updated to point past the end of the group.
7066f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    The pattern pointer (ptr) is on the bracket.
7067f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7068f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    If this is a conditional bracket, check that there are no more than
7069f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    two branches in the group, or just one if it's a DEFINE group. We do this
7070f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    in the real compile phase, not in the pre-pass, where the whole group may
7071f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    not be available. */
7072f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7073f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (bravalue == OP_COND && lengthptr == NULL)
7074f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
707553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      PCRE2_UCHAR *tc = code;
7076f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      int condcount = 0;
7077f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7078f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      do {
7079f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich         condcount++;
7080f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich         tc += GET(tc,1);
7081f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich         }
7082f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      while (*tc != OP_KET);
7083f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7084f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* A DEFINE group is never obeyed inline (the "condition" is always
708553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      false). It must have only one branch. Having checked this, change the
708653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      opcode to OP_FALSE. */
7087f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
708853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (code[LINK_SIZE+1] == OP_DEFINE)
7089f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
7090f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (condcount > 1)
7091f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
7092f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *errorcodeptr = ERR54;
7093f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          goto FAILED;
7094f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
709553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        code[LINK_SIZE+1] = OP_FALSE;
709653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        bravalue = OP_DEFINE;   /* Just a flag to suppress char handling below */
7097f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7098f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7099f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* A "normal" conditional group. If there is just one branch, we must not
710053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      make use of its firstcu or reqcu, because this is equivalent to an
7101f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      empty second branch. */
7102f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7103f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      else
7104f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
7105f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (condcount > 2)
7106f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
7107f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *errorcodeptr = ERR27;
7108f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          goto FAILED;
7109f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
711053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (condcount == 1) subfirstcuflags = subreqcuflags = REQ_NONE;
7111f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7112f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
7113f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
71148b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis    /* At the end of a group, it's an error if we hit end of pattern or
71158b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis    any non-closing parenthesis. This check also happens in the pre-scan,
71168b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis    so should not trigger here, but leave this code as an insurance. */
7117f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7118f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (*ptr != CHAR_RIGHT_PARENTHESIS)
7119f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
7120f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      *errorcodeptr = ERR14;
7121f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      goto FAILED;
7122f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
7123f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7124f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* In the pre-compile phase, update the length by the length of the group,
7125f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    less the brackets at either end. Then reduce the compiled code to just a
7126f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    set of non-capturing brackets so that it doesn't use much memory if it is
7127f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    duplicated by a quantifier.*/
7128f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7129f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (lengthptr != NULL)
7130f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
7131f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (OFLOW_MAX - *lengthptr < length_prevgroup - 2 - 2*LINK_SIZE)
7132f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
7133f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        *errorcodeptr = ERR20;
7134f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        goto FAILED;
7135f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7136f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      *lengthptr += length_prevgroup - 2 - 2*LINK_SIZE;
7137f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      code++;   /* This already contains bravalue */
7138f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      PUTINC(code, 0, 1 + LINK_SIZE);
7139f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      *code++ = OP_KET;
7140f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      PUTINC(code, 0, 1 + LINK_SIZE);
7141f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      break;    /* No need to waste time with special character handling */
7142f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
7143f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7144f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Otherwise update the main code pointer to the end of the group. */
7145f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7146f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    code = tempcode;
7147f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7148f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* For a DEFINE group, required and first character settings are not
7149f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    relevant. */
7150f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
715153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (bravalue == OP_DEFINE) break;
7152f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7153f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Handle updating of the required and first characters for other types of
7154f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    group. Update for normal brackets of all kinds, and conditions with two
7155f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    branches (see code above). If the bracket is followed by a quantifier with
715653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    zero repeat, we have to back off. Hence the definition of zeroreqcu and
715753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    zerofirstcu outside the main loop so that they can be accessed for the
7158f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    back off. */
7159f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
716053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    zeroreqcu = reqcu;
716153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    zeroreqcuflags = reqcuflags;
716253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    zerofirstcu = firstcu;
716353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    zerofirstcuflags = firstcuflags;
716453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    groupsetfirstcu = FALSE;
7165f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7166f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (bravalue >= OP_ONCE)
7167f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
716853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* If we have not yet set a firstcu in this branch, take it from the
7169f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      subpattern, remembering that it was set here so that a repeat of more
717053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      than one can replicate it as reqcu if necessary. If the subpattern has
717153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      no firstcu, set "none" for the whole branch. In both cases, a zero
717253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      repeat forces firstcu to "none". */
7173f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
717453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET)
7175f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
717653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (subfirstcuflags >= 0)
7177f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
717853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          firstcu = subfirstcu;
717953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          firstcuflags = subfirstcuflags;
718053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          groupsetfirstcu = TRUE;
7181f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
718253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        else firstcuflags = REQ_NONE;
718353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        zerofirstcuflags = REQ_NONE;
7184f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7185f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
718653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* If firstcu was previously set, convert the subpattern's firstcu
718753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      into reqcu if there wasn't one, using the vary flag that was in
7188f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      existence beforehand. */
7189f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
719053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      else if (subfirstcuflags >= 0 && subreqcuflags < 0)
7191f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
719253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        subreqcu = subfirstcu;
719353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        subreqcuflags = subfirstcuflags | tempreqvary;
7194f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7195f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7196f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* If the subpattern set a required byte (or set a first byte that isn't
7197f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      really the first byte - see above), set it. */
7198f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
719953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (subreqcuflags >= 0)
7200f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
720153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        reqcu = subreqcu;
720253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        reqcuflags = subreqcuflags;
7203f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7204f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
7205f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
720653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* For a forward assertion, we take the reqcu, if set. This can be
7207f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    helpful if the pattern that follows the assertion doesn't set a different
720853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    char. For example, it's useful for /(?=abcde).+/. We can't set firstcu
7209f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    for an assertion, however because it leads to incorrect effect for patterns
721053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    such as /(?=a)a.+/ when the "real" "a" would then become a reqcu instead
721153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    of a firstcu. This is overcome by a scan at the end if there's no
721253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    firstcu, looking for an asserted first char. */
7213f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
721453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    else if (bravalue == OP_ASSERT && subreqcuflags >= 0)
7215f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
721653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      reqcu = subreqcu;
721753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      reqcuflags = subreqcuflags;
7218f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
7219f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    break;     /* End of processing '(' */
7220f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7221f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7222f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* ===================================================================*/
7223f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Handle metasequences introduced by \. For ones like \d, the ESC_ values
7224f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    are arranged to be the negation of the corresponding OP_values in the
722553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    default case when PCRE2_UCP is not set. For the back references, the values
7226f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    are negative the reference number. Only back references and those types
7227f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    that consume a character may be repeated. We can test for values between
7228f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    ESC_b and ESC_Z for the latter; this may have to change if any new ones are
722953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    ever created.
723053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
723153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    Note: \Q and \E are handled at the start of the character-processing loop,
723253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    not here. */
7233f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7234f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    case CHAR_BACKSLASH:
7235f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    tempptr = ptr;
723653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    escape = PRIV(check_escape)(&ptr, cb->end_pattern, &ec, errorcodeptr,
723753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      options, FALSE, cb);
7238f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (*errorcodeptr != 0) goto FAILED;
7239f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7240f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (escape == 0)                  /* The escape coded a single character */
7241f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      c = ec;
7242f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    else
7243f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
7244f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* For metasequences that actually match a character, we disable the
7245f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      setting of a first character if it hasn't already been set. */
7246f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
724753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (firstcuflags == REQ_UNSET && escape > ESC_b && escape < ESC_Z)
724853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        firstcuflags = REQ_NONE;
7249f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7250f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* Set values to reset to if this is followed by a zero repeat. */
7251f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
725253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      zerofirstcu = firstcu;
725353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      zerofirstcuflags = firstcuflags;
725453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      zeroreqcu = reqcu;
725553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      zeroreqcuflags = reqcuflags;
7256f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7257f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n'
7258f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      is a subroutine call by number (Oniguruma syntax). In fact, the value
7259f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      ESC_g is returned only for these cases. So we don't need to check for <
7260f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      or ' if the value is ESC_g. For the Perl syntax \g{n} the value is
7261f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      -n, and for the Perl syntax \g{name} the result is ESC_k (as
7262f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      that is a synonym for a named back reference). */
7263f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7264f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (escape == ESC_g)
7265f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
726653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        PCRE2_SPTR p;
726753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        uint32_t cf;
7268f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7269f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
7270f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
7271f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7272f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* These two statements stop the compiler for warning about possibly
7273f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In
7274f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        fact, because we do the check for a number below, the paths that
7275f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        would actually be in error are never taken. */
7276f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
727753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        skipunits = 0;
7278f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        reset_bracount = FALSE;
7279f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7280f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* If it's not a signed or unsigned number, treat it as a name. */
7281f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7282f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        cf = ptr[1];
7283f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (cf != CHAR_PLUS && cf != CHAR_MINUS && !IS_DIGIT(cf))
7284f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
7285f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          is_recurse = TRUE;
7286f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          goto NAMED_REF_OR_RECURSE;
7287f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
7288f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7289f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Signed or unsigned number (cf = ptr[1]) is known to be plus or minus
7290f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        or a digit. */
7291f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7292f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        p = ptr + 2;
7293f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        while (IS_DIGIT(*p)) p++;
729453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (*p != (PCRE2_UCHAR)terminator)
7295f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
7296f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *errorcodeptr = ERR57;
72970ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes          goto FAILED;
7298f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
7299f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        ptr++;
7300f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        goto HANDLE_NUMERICAL_RECURSION;
7301f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7302f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7303f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* \k<name> or \k'name' is a back reference by name (Perl syntax).
7304f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      We also support \k{name} (.NET syntax).  */
7305f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7306f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (escape == ESC_k)
7307f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
7308f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if ((ptr[1] != CHAR_LESS_THAN_SIGN &&
7309f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          ptr[1] != CHAR_APOSTROPHE && ptr[1] != CHAR_LEFT_CURLY_BRACKET))
7310f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
7311f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *errorcodeptr = ERR69;
73120ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes          goto FAILED;
7313f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
7314f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        is_recurse = FALSE;
7315f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
7316f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)?
7317f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET;
7318f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        goto NAMED_REF_OR_RECURSE;
7319f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7320f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
732153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Back references are handled specially; must disable firstcu if
7322f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      not set to cope with cases like (?=(\w+))\1: which would otherwise set
7323f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      ':' later. */
7324f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7325f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (escape < 0)
7326f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
7327f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        open_capitem *oc;
7328f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        recno = -escape;
7329f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7330f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Come here from named backref handling when the reference is to a
733153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        single group (i.e. not to a duplicated name). */
7332f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7333f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        HANDLE_REFERENCE:
733453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (recno > (int)cb->final_bracount)
733553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
733653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          *errorcodeptr = ERR15;
733753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          goto FAILED;
733853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
733953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE;
7340f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        previous = code;
734153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        *code++ = ((options & PCRE2_CASELESS) != 0)? OP_REFI : OP_REF;
7342f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        PUT2INC(code, 0, recno);
734353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        cb->backref_map |= (recno < 32)? (1u << recno) : 1;
734453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if ((uint32_t)recno > cb->top_backref) cb->top_backref = recno;
7345f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7346f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        /* Check to see if this back reference is recursive, that it, it
7347f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        is inside the group that it references. A flag is set so that the
7348f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        group can be made atomic. */
7349f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
735053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        for (oc = cb->open_caps; oc != NULL; oc = oc->next)
7351f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
7352f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          if (oc->number == recno)
7353f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            {
7354f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            oc->flag = TRUE;
7355f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            break;
7356f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            }
7357f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
7358f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7359f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7360f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* So are Unicode property matches, if supported. */
7361f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
736253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
7363f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      else if (escape == ESC_P || escape == ESC_p)
7364f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
7365f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        BOOL negated;
7366f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        unsigned int ptype = 0, pdata = 0;
736753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr, cb))
7368f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          goto FAILED;
7369f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        previous = code;
7370f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP;
7371f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        *code++ = ptype;
7372f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        *code++ = pdata;
7373f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7374f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#else
7375f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7376f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* If Unicode properties are not supported, \X, \P, and \p are not
7377f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      allowed. */
7378f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7379f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      else if (escape == ESC_X || escape == ESC_P || escape == ESC_p)
7380f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
7381f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        *errorcodeptr = ERR45;
7382f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        goto FAILED;
7383f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7384f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
7385f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
738653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* The use of \C can be locked out. */
738753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
738853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef NEVER_BACKSLASH_C
738953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      else if (escape == ESC_C)
739053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
739153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        *errorcodeptr = ERR85;
739253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        goto FAILED;
739353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
739453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#else
739553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      else if (escape == ESC_C && (options & PCRE2_NEVER_BACKSLASH_C) != 0)
739653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
739753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        *errorcodeptr = ERR83;
739853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        goto FAILED;
739953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
740053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
740153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
7402f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      /* For the rest (including \X when Unicode properties are supported), we
7403f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      can obtain the OP value by negating the escape value in the default
740453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      situation when PCRE2_UCP is not set. When it *is* set, we substitute
7405f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      Unicode property tests. Note that \b and \B do a one-character
7406f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      lookbehind, and \A also behaves as if it does. */
7407f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7408f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      else
7409f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
741053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (escape == ESC_C) cb->external_flags |= PCRE2_HASBKC; /* Record */
7411f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
741253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis             cb->max_lookbehind == 0)
741353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          cb->max_lookbehind = 1;
741453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
7415f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (escape >= ESC_DU && escape <= ESC_wu)
7416f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
741753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          cb->nestptr[1] = cb->nestptr[0];         /* Back up if at 2nd level */
741853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          cb->nestptr[0] = ptr + 1;                /* Where to resume */
7419f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          ptr = substitutes[escape - ESC_DU] - 1;  /* Just before substitute */
7420f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
7421f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        else
7422f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
74238b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis        /* In non-UTF mode, and for both 32-bit modes, we turn \C into
74248b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis        OP_ALLANY instead of OP_ANYBYTE so that it works in DFA mode and in
74258b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis        lookbehinds. */
7426f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7427f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
7428f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          previous = (escape > ESC_b && escape < ESC_Z)? code : NULL;
74298b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis#if PCRE2_CODE_UNIT_WIDTH == 32
74308b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis          *code++ = (escape == ESC_C)? OP_ALLANY : escape;
74318b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis#else
7432f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape;
74338b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis#endif
7434f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
7435f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7436f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      continue;
7437f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
7438f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7439f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* We have a data character whose value is in c. In UTF-8 mode it may have
7440f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    a value > 127. We set its representation in the length/buffer, and then
7441f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    handle it as a data character. */
7442f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
744353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    mclength = PUTCHAR(c, mcbuffer);
7444f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    goto ONE_CHAR;
7445f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7446f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7447f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* ===================================================================*/
7448f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Handle a literal character. It is guaranteed not to be whitespace or #
7449f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    when the extended flag is set. If we are in a UTF mode, it may be a
7450f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    multi-unit literal character. */
7451f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7452f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    default:
7453f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    NORMAL_CHAR:
7454f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    mclength = 1;
7455f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    mcbuffer[0] = c;
7456f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
745753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
7458f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (utf && HAS_EXTRALEN(c))
7459f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));
7460f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
7461f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7462f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* At this point we have the character's bytes in mcbuffer, and the length
746353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    in mclength. When not in UTF mode, the length is always 1. */
7464f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7465f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    ONE_CHAR:
7466f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    previous = code;
7467f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
746853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* For caseless UTF mode, check whether this character has more than one
746953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    other case. If so, generate a special OP_PROP item instead of OP_CHARI. */
7470f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
747153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef SUPPORT_UNICODE
747253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (utf && (options & PCRE2_CASELESS) != 0)
7473f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
7474f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      GETCHAR(c, mcbuffer);
7475f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if ((c = UCD_CASESET(c)) != 0)
7476f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
7477f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        *code++ = OP_PROP;
7478f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        *code++ = PT_CLIST;
7479f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        *code++ = c;
748053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (firstcuflags == REQ_UNSET)
748153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          firstcuflags = zerofirstcuflags = REQ_NONE;
7482f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        break;
7483f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7484f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
7485f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
7486f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7487f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Caseful matches, or not one of the multicase characters. */
7488f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
748953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *code++ = ((options & PCRE2_CASELESS) != 0)? OP_CHARI : OP_CHAR;
7490f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    for (c = 0; c < mclength; c++) *code++ = mcbuffer[c];
7491f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7492f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Remember if \r or \n were seen */
7493f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7494f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (mcbuffer[0] == CHAR_CR || mcbuffer[0] == CHAR_NL)
749553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      cb->external_flags |= PCRE2_HASCRORLF;
7496f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7497f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Set the first and required bytes appropriately. If no previous first
7498f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    byte, set it from this character, but revert to none on a zero repeat.
749953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    Otherwise, leave the firstcu value alone, and don't change it on a zero
7500f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    repeat. */
7501f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
750253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (firstcuflags == REQ_UNSET)
7503f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
750453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      zerofirstcuflags = REQ_NONE;
750553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      zeroreqcu = reqcu;
750653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      zeroreqcuflags = reqcuflags;
7507f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
750853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* If the character is more than one byte long, we can set firstcu
7509f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      only if it is not to be matched caselessly. */
7510f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7511f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (mclength == 1 || req_caseopt == 0)
7512f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
751353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        firstcu = mcbuffer[0] | req_caseopt;
751453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        firstcu = mcbuffer[0];
751553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        firstcuflags = req_caseopt;
7516f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7517f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        if (mclength != 1)
7518f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
751953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          reqcu = code[-1];
752053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          reqcuflags = cb->req_varyopt;
7521f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
7522f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
752353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      else firstcuflags = reqcuflags = REQ_NONE;
7524f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
7525f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
752653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* firstcu was previously set; we can set reqcu only if the length is
7527f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    1 or the matching is caseful. */
7528f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7529f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    else
7530f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
753153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      zerofirstcu = firstcu;
753253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      zerofirstcuflags = firstcuflags;
753353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      zeroreqcu = reqcu;
753453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      zeroreqcuflags = reqcuflags;
7535f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (mclength == 1 || req_caseopt == 0)
7536f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
753753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        reqcu = code[-1];
753853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        reqcuflags = req_caseopt | cb->req_varyopt;
7539f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7540f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
7541f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7542f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    break;            /* End of literal character handling */
7543f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
7544f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }                   /* end of big loop */
7545f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7546f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Control never reaches here by falling through, only by a goto for all the
7547f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevicherror states. Pass back the position in the pattern so that it can be displayed
7548f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichto the user for diagnosing the error. */
7549f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7550f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichFAILED:
7551f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*ptrptr = ptr;
7552f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichreturn FALSE;
7553f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}
7554f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7555f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7556f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7557f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*************************************************
755853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*   Compile regex: a sequence of alternatives    *
7559f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/
7560f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7561f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* On entry, ptr is pointing past the bracket character, but on return it
7562f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichpoints to the closing bracket, or vertical bar, or end of string. The code
7563f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichvariable is pointing at the byte into which the BRA operator has been stored.
7564f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichThis function is used during the pre-compile phase when we are trying to find
7565f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichout the amount of memory needed, as well as during the real compile phase. The
7566f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichvalue of lengthptr distinguishes the two phases.
7567f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7568f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments:
7569f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  options           option bits, including any changes for this subpattern
7570f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  codeptr           -> the address of the current code pointer
7571f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  ptrptr            -> the address of the current pattern pointer
7572f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  errorcodeptr      -> pointer to error code variable
7573f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  lookbehind        TRUE if this is a lookbehind assertion
7574f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  reset_bracount    TRUE to reset the count for each branch
757553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  skipunits         skip this many code units at start (for brackets and OP_COND)
7576f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  cond_depth        depth of nesting for conditional subpatterns
757753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  firstcuptr        place to put the first required code unit
757853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  firstcuflagsptr   place to put the first code unit flags, or a negative number
757953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  reqcuptr          place to put the last required code unit
758053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  reqcuflagsptr     place to put the last required code unit flags, or a negative number
7581f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  bcptr             pointer to the chain of currently open branches
758253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb                points to the data block with tables pointers etc.
7583f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  lengthptr         NULL during the real compile phase
7584f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich                    points to length accumulator during pre-compile phase
7585f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7586f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichReturns:            TRUE on success
7587f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/
7588f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7589f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic BOOL
759053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscompile_regex(uint32_t options, PCRE2_UCHAR **codeptr, PCRE2_SPTR *ptrptr,
759153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, uint32_t skipunits,
759253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  int cond_depth, uint32_t *firstcuptr, int32_t *firstcuflagsptr,
759353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uint32_t *reqcuptr, int32_t *reqcuflagsptr, branch_chain *bcptr,
759453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  compile_block *cb, size_t *lengthptr)
7595f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{
759653e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR ptr = *ptrptr;
759753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *code = *codeptr;
759853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *last_branch = code;
759953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *start_bracket = code;
760053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *reverse_count = NULL;
7601f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichopen_capitem capitem;
7602f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichint capnumber = 0;
760353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t firstcu, reqcu;
760453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint32_t firstcuflags, reqcuflags;
760553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t branchfirstcu, branchreqcu;
760653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint32_t branchfirstcuflags, branchreqcuflags;
760753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissize_t length;
7608f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichunsigned int orig_bracount;
7609f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichunsigned int max_bracount;
7610f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbranch_chain bc;
7611f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7612f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* If set, call the external function that checks for stack availability. */
7613f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
761453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (cb->cx->stack_guard != NULL &&
761553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    cb->cx->stack_guard(cb->parens_depth, cb->cx->stack_guard_data))
7616f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
761753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  *errorcodeptr= ERR33;
7618f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  return FALSE;
7619f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
7620f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7621f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Miscellaneous initialization */
7622f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7623f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbc.outer = bcptr;
7624f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbc.current_branch = code;
7625f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
762653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfirstcu = reqcu = 0;
762753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfirstcuflags = reqcuflags = REQ_UNSET;
76288366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes
7629f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Accumulate the length for use in the pre-compile phase. Start with the
763053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskislength of the BRA and KET and any extra code units that are required at the
7631f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbeginning. We accumulate in a local variable to save frequent testing of
763253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskislengthptr for NULL. We cannot do this by looking at the value of 'code' at the
7633f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstart and end of each alternative, because compiled items are discarded during
7634f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichthe pre-compile phase so that the work space is not exceeded. */
7635f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
763653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskislength = 2 + 2*LINK_SIZE + skipunits;
7637f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7638f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* WARNING: If the above line is changed for any reason, you must also change
7639f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichthe code that abstracts option settings at the start of the pattern and makes
7640f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichthem global. It tests the value of length for (2 + 2*LINK_SIZE) in the
764153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispre-compile phase to find out whether or not anything has yet been compiled.
7642f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
764353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisIf this is a capturing subpattern, add to the chain of open capturing items
7644f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichso that we can detect them if (*ACCEPT) is encountered. This is also used to
7645f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichdetect groups that contain recursive back references to themselves. Note that
7646f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichonly OP_CBRA need be tested here; changing this opcode to one of its variants,
7647f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kraleviche.g. OP_SCBRAPOS, happens later, after the group has been compiled. */
7648f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7649f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichif (*code == OP_CBRA)
7650f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
7651f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  capnumber = GET2(code, 1 + LINK_SIZE);
7652f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  capitem.number = capnumber;
765353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  capitem.next = cb->open_caps;
7654f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  capitem.flag = FALSE;
765553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb->open_caps = &capitem;
7656f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
7657f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7658f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Offset is set zero to mark that this bracket is still open */
7659f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7660f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichPUT(code, 1, 0);
766153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscode += 1 + LINK_SIZE + skipunits;
7662f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7663f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Loop for each alternative branch */
7664f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
766553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisorig_bracount = max_bracount = cb->bracount;
766653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
7667f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichfor (;;)
7668f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
7669f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  /* For a (?| group, reset the capturing bracket count so that each branch
7670f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  uses the same numbers. */
7671f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
767253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (reset_bracount) cb->bracount = orig_bracount;
7673f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7674f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  /* Set up dummy OP_REVERSE if lookbehind assertion */
7675f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7676f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  if (lookbehind)
7677f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
7678f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    *code++ = OP_REVERSE;
7679f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    reverse_count = code;
7680f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    PUTINC(code, 0, 0);
7681f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    length += 1 + LINK_SIZE;
7682f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
7683f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7684f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  /* Now compile the branch; in the pre-compile phase its length gets added
7685f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  into the length. */
7686f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
768753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstcu,
768853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        &branchfirstcuflags, &branchreqcu, &branchreqcuflags, &bc,
768953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        cond_depth, cb, (lengthptr == NULL)? NULL : &length))
7690f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
7691f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    *ptrptr = ptr;
7692f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    return FALSE;
7693f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
7694f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7695f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  /* Keep the highest bracket count in case (?| was used and some branch
7696f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  has fewer than the rest. */
7697f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
769853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (cb->bracount > max_bracount) max_bracount = cb->bracount;
7699f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7700f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  /* In the real compile phase, there is some post-processing to be done. */
7701f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7702f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  if (lengthptr == NULL)
7703f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
770453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* If this is the first branch, the firstcu and reqcu values for the
7705f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    branch become the values for the regex. */
7706f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7707f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (*last_branch != OP_ALT)
7708f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
770953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      firstcu = branchfirstcu;
771053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      firstcuflags = branchfirstcuflags;
771153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      reqcu = branchreqcu;
771253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      reqcuflags = branchreqcuflags;
7713f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
7714f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
771553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* If this is not the first branch, the first char and reqcu have to
7716f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    match the values from all the previous branches, except that if the
771753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    previous value for reqcu didn't have REQ_VARY set, it can still match,
7718f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    and we set REQ_VARY for the regex. */
7719f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7720f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    else
7721f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
772253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* If we previously had a firstcu, but it doesn't match the new branch,
772353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      we have to abandon the firstcu for the regex, but if there was
772453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      previously no reqcu, it takes on the value of the old firstcu. */
7725f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
772653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (firstcuflags != branchfirstcuflags || firstcu != branchfirstcu)
7727f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
772853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (firstcuflags >= 0)
7729f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          {
773053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (reqcuflags < 0)
773153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            {
773253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            reqcu = firstcu;
773353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            reqcuflags = firstcuflags;
773453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis            }
7735f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich          }
773653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        firstcuflags = REQ_NONE;
7737f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7738f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
773953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* If we (now or from before) have no firstcu, a firstcu from the
774053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      branch becomes a reqcu if there isn't a branch reqcu. */
7741f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
774253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (firstcuflags < 0 && branchfirstcuflags >= 0 &&
774353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          branchreqcuflags < 0)
7744f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
774553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        branchreqcu = branchfirstcu;
774653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        branchreqcuflags = branchfirstcuflags;
7747f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7748f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
774953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* Now ensure that the reqcus match */
7750f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
775153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (((reqcuflags & ~REQ_VARY) != (branchreqcuflags & ~REQ_VARY)) ||
775253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          reqcu != branchreqcu)
775353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        reqcuflags = REQ_NONE;
7754f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      else
7755f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
775653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        reqcu = branchreqcu;
775753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        reqcuflags |= branchreqcuflags; /* To "or" REQ_VARY */
7758f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7759f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
7760f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7761f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* If lookbehind, check that this branch matches a fixed-length string, and
7762f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    put the length into the OP_REVERSE item. Temporarily mark the end of the
776353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    branch with OP_END. If the branch contains OP_RECURSE, the result is
776453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    FFL_LATER (a negative value) because there may be forward references that
776553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    we can't check here. Set a flag to cause another lookbehind check at the
776653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    end. Why not do it all at the end? Because common errors can be picked up
776753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    here and the offset of the problem can be shown. */
7768f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7769f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (lookbehind)
7770f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
7771f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      int fixed_length;
777253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      int count = 0;
7773f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      *code = OP_END;
777453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      fixed_length = find_fixedlength(last_branch,  (options & PCRE2_UTF) != 0,
777553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        FALSE, cb, NULL, &count);
777653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (fixed_length == FFL_LATER)
7777f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
777853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        cb->check_lookbehind = TRUE;
7779f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7780f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      else if (fixed_length < 0)
7781f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
778253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        *errorcodeptr = fixed_length_errors[-fixed_length];
7783f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        *ptrptr = ptr;
7784f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        return FALSE;
7785f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7786f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      else
7787f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
778853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (fixed_length > cb->max_lookbehind)
778953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          cb->max_lookbehind = fixed_length;
7790f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        PUT(reverse_count, 0, fixed_length);
7791f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7792f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
7793f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
7794f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7795f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  /* Reached end of expression, either ')' or end of pattern. In the real
7796f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  compile phase, go back through the alternative branches and reverse the chain
7797f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  of offsets, with the field in the BRA item now becoming an offset to the
7798f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  first alternative. If there are no alternatives, it points to the end of the
7799f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  group. The length in the terminating ket is always the length of the whole
7800f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  bracketed item. Return leaving the pointer at the terminating char. */
7801f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7802f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  if (*ptr != CHAR_VERTICAL_LINE)
7803f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
7804f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (lengthptr == NULL)
7805f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
780653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      size_t branch_length = code - last_branch;
7807f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      do
7808f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
780953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        size_t prev_length = GET(last_branch, 1);
7810f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        PUT(last_branch, 1, branch_length);
7811f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        branch_length = prev_length;
7812f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        last_branch -= branch_length;
7813f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7814f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      while (branch_length > 0);
7815f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
7816f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7817f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Fill in the ket */
7818f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7819f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    *code = OP_KET;
7820f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    PUT(code, 1, (int)(code - start_bracket));
7821f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    code += 1 + LINK_SIZE;
7822f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7823f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* If it was a capturing subpattern, check to see if it contained any
782453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    recursive back references. If so, we must wrap it in atomic brackets. In
782553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    any event, remove the block from the chain. */
7826f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7827f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (capnumber > 0)
7828f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
782953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (cb->open_caps->flag)
7830f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
7831f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
783253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          CU2BYTES(code - start_bracket));
7833f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        *start_bracket = OP_ONCE;
7834f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        code += 1 + LINK_SIZE;
7835f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        PUT(start_bracket, 1, (int)(code - start_bracket));
7836f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        *code = OP_KET;
7837f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        PUT(code, 1, (int)(code - start_bracket));
7838f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        code += 1 + LINK_SIZE;
7839f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        length += 2 + 2*LINK_SIZE;
7840f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
784153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      cb->open_caps = cb->open_caps->next;
7842f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
7843f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7844f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Retain the highest bracket number, in case resetting was used. */
7845f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
784653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    cb->bracount = max_bracount;
7847f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7848f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    /* Set values to pass back */
7849f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7850f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    *codeptr = code;
7851f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    *ptrptr = ptr;
785253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *firstcuptr = firstcu;
785353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *firstcuflagsptr = firstcuflags;
785453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *reqcuptr = reqcu;
785553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *reqcuflagsptr = reqcuflags;
7856f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (lengthptr != NULL)
7857f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
7858f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (OFLOW_MAX - *lengthptr < length)
7859f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
7860f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        *errorcodeptr = ERR20;
7861f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        return FALSE;
7862f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
7863f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      *lengthptr += length;
7864f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
7865f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    return TRUE;
7866f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
7867f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7868f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  /* Another branch follows. In the pre-compile phase, we can move the code
7869f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  pointer back to where it was for the start of the first branch. (That is,
7870f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  pretend that each branch is the only one.)
7871f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7872f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  In the real compile phase, insert an ALT node. Its length field points back
7873f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  to the previous branch while the bracket remains open. At the end the chain
7874f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  is reversed. It's done like this so that the start of the bracket has a
7875f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  zero offset until it is closed, making it possible to detect recursion. */
7876f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7877f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  if (lengthptr != NULL)
7878f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
787953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    code = *codeptr + 1 + LINK_SIZE + skipunits;
7880f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    length += 1 + LINK_SIZE;
7881f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
7882f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  else
7883f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
7884f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    *code = OP_ALT;
7885f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    PUT(code, 1, (int)(code - last_branch));
7886f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    bc.current_branch = last_branch = code;
7887f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    code += 1 + LINK_SIZE;
7888f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
7889f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
789053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* Advance past the vertical bar */
789153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
7892f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  ptr++;
7893f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
7894f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Control never reaches here */
7895f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}
7896f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7897f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7898f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7899f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*************************************************
790053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*          Check for anchored pattern            *
7901f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/
7902f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7903f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Try to find out if this is an anchored regular expression. Consider each
7904f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichalternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket
7905f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichall of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then
7906f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichit's anchored. However, if this is a multiline pattern, then only OP_SOD will
7907f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbe found, because ^ generates OP_CIRCM in that mode.
7908f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7909f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichWe can also consider a regex to be anchored if OP_SOM starts all its branches.
7910f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichThis is the code for \G, which means "match at start of match position, taking
7911f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichinto account the match offset".
7912f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7913f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichA branch is also implicitly anchored if it starts with .* and DOTALL is set,
7914f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbecause that will try the rest of the pattern at all possible matching points,
7915f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichso there is no point trying again.... er ....
7916f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7917f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich.... except when the .* appears inside capturing parentheses, and there is a
7918f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichsubsequent back reference to those parentheses. We haven't enough information
7919f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichto catch that case precisely.
7920f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7921f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichAt first, the best we could do was to detect when .* was in capturing brackets
7922f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichand the highest back reference was greater than or equal to that level.
7923f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichHowever, by keeping a bitmap of the first 31 back references, we can catch some
7924f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichof the more common cases more precisely.
7925f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7926f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich... A second exception is when the .* appears inside an atomic group, because
7927f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichthis prevents the number of characters it matches from being adjusted.
7928f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7929f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments:
793053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  code           points to start of the compiled pattern
7931f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  bracket_map    a bitmap of which brackets we are inside while testing; this
793253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                   handles up to substring 31; after that we just have to take
793353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                   the less precise approach
793453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb             points to the compile data block
7935f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  atomcount      atomic group level
7936f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7937f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichReturns:     TRUE or FALSE
7938f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/
7939f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7940f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic BOOL
794153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisis_anchored(register PCRE2_SPTR code, unsigned int bracket_map,
794253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  compile_block *cb, int atomcount)
7943f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{
7944f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichdo {
794553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis   PCRE2_SPTR scode = first_significant_code(
7946f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     code + PRIV(OP_lengths)[*code], FALSE);
7947f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   register int op = *scode;
7948f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7949f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   /* Non-capturing brackets */
7950f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7951f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   if (op == OP_BRA  || op == OP_BRAPOS ||
7952f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich       op == OP_SBRA || op == OP_SBRAPOS)
7953f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     {
795453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis     if (!is_anchored(scode, bracket_map, cb, atomcount)) return FALSE;
7955f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     }
7956f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7957f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   /* Capturing brackets */
7958f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7959f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   else if (op == OP_CBRA  || op == OP_CBRAPOS ||
7960f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            op == OP_SCBRA || op == OP_SCBRAPOS)
7961f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     {
7962f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     int n = GET2(scode, 1+LINK_SIZE);
796353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis     int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
796453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis     if (!is_anchored(scode, new_map, cb, atomcount)) return FALSE;
7965f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     }
7966f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7967f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   /* Positive forward assertions and conditions */
7968f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7969f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   else if (op == OP_ASSERT || op == OP_COND)
7970f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     {
797153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis     if (!is_anchored(scode, bracket_map, cb, atomcount)) return FALSE;
7972f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     }
7973f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7974f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   /* Atomic groups */
7975f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7976f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   else if (op == OP_ONCE || op == OP_ONCE_NC)
7977f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     {
797853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis     if (!is_anchored(scode, bracket_map, cb, atomcount + 1))
7979f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich       return FALSE;
7980f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     }
7981f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7982f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   /* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and
7983f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   it isn't in brackets that are or may be referenced or inside an atomic
798453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis   group. There is also an option that disables auto-anchoring. */
7985f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7986f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||
7987f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich             op == OP_TYPEPOSSTAR))
7988f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     {
798953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis     if (scode[1] != OP_ALLANY || (bracket_map & cb->backref_map) != 0 ||
799053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis         atomcount > 0 || cb->had_pruneorskip ||
799153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis         (cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)
7992f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich       return FALSE;
7993f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     }
7994f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7995f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   /* Check for explicit anchoring */
7996f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7997f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   else if (op != OP_SOD && op != OP_SOM && op != OP_CIRC) return FALSE;
7998f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
7999f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   code += GET(code, 1);
8000f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   }
8001f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichwhile (*code == OP_ALT);   /* Loop for each alternative */
8002f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichreturn TRUE;
8003f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}
8004f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8005f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8006f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8007f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*************************************************
8008f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*         Check for starting with ^ or .*        *
8009f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/
8010f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8011f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* This is called to find out if every branch starts with ^ or .* so that
8012f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich"first char" processing can be done to speed things up in multiline
8013f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichmatching and for non-DOTALL patterns that start with .* (which must start at
8014f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichthe beginning or after \n). As in the case of is_anchored() (see above), we
8015f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichhave to take account of back references to capturing brackets that contain .*
8016f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbecause in that case we can't make the assumption. Also, the appearance of .*
8017f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichinside atomic brackets or in a pattern that contains *PRUNE or *SKIP does not
8018f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichcount, because once again the assumption no longer holds.
8019f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8020f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments:
802153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  code           points to start of the compiled pattern or a group
8022f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  bracket_map    a bitmap of which brackets we are inside while testing; this
802353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                   handles up to substring 31; after that we just have to take
802453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                   the less precise approach
802553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb             points to the compile data
8026f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  atomcount      atomic group level
8027f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8028f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichReturns:         TRUE or FALSE
8029f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/
8030f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8031f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic BOOL
803253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisis_startline(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb,
803353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  int atomcount)
8034f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{
8035f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichdo {
803653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis   PCRE2_SPTR scode = first_significant_code(
8037f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     code + PRIV(OP_lengths)[*code], FALSE);
8038f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   register int op = *scode;
8039f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8040f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   /* If we are at the start of a conditional assertion group, *both* the
8041f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   conditional assertion *and* what follows the condition must satisfy the test
8042f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   for start of line. Other kinds of condition fail. Note that there may be an
8043f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   auto-callout at the start of a condition. */
8044f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8045f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   if (op == OP_COND)
8046f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     {
8047f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     scode += 1 + LINK_SIZE;
804853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
8049f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT];
805053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       else if (*scode == OP_CALLOUT_STR) scode += GET(scode, 1 + 2*LINK_SIZE);
805153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
8052f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     switch (*scode)
8053f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich       {
8054f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich       case OP_CREF:
8055f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich       case OP_DNCREF:
8056f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich       case OP_RREF:
8057f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich       case OP_DNRREF:
80588366e8beecf85b8e61b5c1a1369666db7a292eaeElliott Hughes       case OP_FAIL:
805953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       case OP_FALSE:
806053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       case OP_TRUE:
8061f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich       return FALSE;
8062f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8063f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich       default:     /* Assertion */
806453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       if (!is_startline(scode, bracket_map, cb, atomcount)) return FALSE;
8065f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich       do scode += GET(scode, 1); while (*scode == OP_ALT);
8066f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich       scode += 1 + LINK_SIZE;
8067f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich       break;
8068f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich       }
8069f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     scode = first_significant_code(scode, FALSE);
8070f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     op = *scode;
8071f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     }
8072f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8073f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   /* Non-capturing brackets */
8074f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8075f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   if (op == OP_BRA  || op == OP_BRAPOS ||
8076f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich       op == OP_SBRA || op == OP_SBRAPOS)
8077f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     {
807853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis     if (!is_startline(scode, bracket_map, cb, atomcount)) return FALSE;
8079f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     }
8080f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8081f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   /* Capturing brackets */
8082f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8083f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   else if (op == OP_CBRA  || op == OP_CBRAPOS ||
8084f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich            op == OP_SCBRA || op == OP_SCBRAPOS)
8085f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     {
8086f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     int n = GET2(scode, 1+LINK_SIZE);
808753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis     int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
808853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis     if (!is_startline(scode, new_map, cb, atomcount)) return FALSE;
8089f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     }
8090f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8091f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   /* Positive forward assertions */
8092f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8093f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   else if (op == OP_ASSERT)
8094f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     {
809553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis     if (!is_startline(scode, bracket_map, cb, atomcount)) return FALSE;
8096f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     }
8097f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8098f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   /* Atomic brackets */
8099f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8100f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   else if (op == OP_ONCE || op == OP_ONCE_NC)
8101f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     {
810253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis     if (!is_startline(scode, bracket_map, cb, atomcount + 1)) return FALSE;
8103f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     }
8104f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8105f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   /* .* means "start at start or after \n" if it isn't in atomic brackets or
8106f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   brackets that may be referenced, as long as the pattern does not contain
8107f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   *PRUNE or *SKIP, because these break the feature. Consider, for example,
8108f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   /.*?a(*PRUNE)b/ with the subject "aab", which matches "ab", i.e. not at the
810953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis   start of a line. There is also an option that disables this optimization. */
8110f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8111f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR || op == OP_TYPEPOSSTAR)
8112f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     {
811353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis     if (scode[1] != OP_ANY || (bracket_map & cb->backref_map) != 0 ||
811453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis         atomcount > 0 || cb->had_pruneorskip ||
811553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis         (cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)
8116f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich       return FALSE;
8117f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     }
8118f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8119f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   /* Check for explicit circumflex; anything else gives a FALSE result. Note
8120f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   in particular that this includes atomic brackets OP_ONCE and OP_ONCE_NC
8121f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   because the number of characters matched by .* cannot be adjusted inside
8122f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   them. */
8123f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8124f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   else if (op != OP_CIRC && op != OP_CIRCM) return FALSE;
8125f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8126f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   /* Move on to the next alternative */
8127f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8128f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   code += GET(code, 1);
8129f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   }
8130f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichwhile (*code == OP_ALT);  /* Loop for each alternative */
8131f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichreturn TRUE;
8132f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}
8133f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8134f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8135f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8136f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*************************************************
813753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*    Check for asserted fixed first code unit    *
8138f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/
8139f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
814053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* During compilation, the "first code unit" settings from forward assertions
814153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisare discarded, because they can cause conflicts with actual literals that
814253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfollow. However, if we end up without a first code unit setting for an
814353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisunanchored pattern, it is worth scanning the regex to see if there is an
814453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisinitial asserted first code unit. If all branches start with the same asserted
814553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscode unit, or with a non-conditional bracket all of whose alternatives start
814653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswith the same asserted code unit (recurse ad lib), then we return that code
814753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisunit, with the flags set to zero or REQ_CASELESS; otherwise return zero with
814853e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisREQ_NONE in the flags.
8149f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8150f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments:
815153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  code       points to start of compiled pattern
815253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  flags      points to the first code unit flags
8153f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  inassert   TRUE if in an assertion
8154f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
815553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisReturns:     the fixed first code unit, or 0 with REQ_NONE in flags
8156f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/
8157f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
815853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstatic uint32_t
815953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfind_firstassertedcu(PCRE2_SPTR code, int32_t *flags, BOOL inassert)
8160f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{
816153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisregister uint32_t c = 0;
8162f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichint cflags = REQ_NONE;
8163f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8164f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*flags = REQ_NONE;
8165f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichdo {
816653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis   uint32_t d;
8167f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   int dflags;
8168f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   int xl = (*code == OP_CBRA || *code == OP_SCBRA ||
8169f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich             *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0;
817053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis   PCRE2_SPTR scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE);
817153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis   register PCRE2_UCHAR op = *scode;
8172f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8173f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   switch(op)
8174f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     {
8175f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     default:
8176f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     return 0;
8177f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8178f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     case OP_BRA:
8179f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     case OP_BRAPOS:
8180f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     case OP_CBRA:
8181f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     case OP_SCBRA:
8182f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     case OP_CBRAPOS:
8183f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     case OP_SCBRAPOS:
8184f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     case OP_ASSERT:
8185f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     case OP_ONCE:
8186f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     case OP_ONCE_NC:
818753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis     d = find_firstassertedcu(scode, &dflags, op == OP_ASSERT);
8188f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     if (dflags < 0)
8189f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich       return 0;
819053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis     if (cflags < 0) { c = d; cflags = dflags; }
819153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       else if (c != d || cflags != dflags) return 0;
8192f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     break;
8193f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8194f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     case OP_EXACT:
8195f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     scode += IMM2_SIZE;
8196f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     /* Fall through */
8197f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8198f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     case OP_CHAR:
8199f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     case OP_PLUS:
8200f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     case OP_MINPLUS:
8201f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     case OP_POSPLUS:
8202f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     if (!inassert) return 0;
8203f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     if (cflags < 0) { c = scode[1]; cflags = 0; }
8204f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich       else if (c != scode[1]) return 0;
8205f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     break;
8206f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8207f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     case OP_EXACTI:
8208f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     scode += IMM2_SIZE;
8209f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     /* Fall through */
8210f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8211f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     case OP_CHARI:
8212f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     case OP_PLUSI:
8213f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     case OP_MINPLUSI:
8214f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     case OP_POSPLUSI:
8215f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     if (!inassert) return 0;
8216f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
8217f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich       else if (c != scode[1]) return 0;
8218f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     break;
8219f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich     }
8220f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8221f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   code += GET(code, 1);
8222f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich   }
8223f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichwhile (*code == OP_ALT);
8224f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8225f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*flags = cflags;
8226f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichreturn c;
8227f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}
8228f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8229f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8230f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8231f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*************************************************
8232f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*     Add an entry to the name/number table      *
8233f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/
8234f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8235f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* This function is called between compiling passes to add an entry to the
8236f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichname/number table, maintaining alphabetical order. Checking for permitted
8237f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichand forbidden duplicates has already been done.
8238f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8239f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments:
824053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb           the compile data block
8241f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  name         the name to add
8242f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  length       the length of the name
8243f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  groupno      the group number
8244f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8245f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichReturns:       nothing
8246f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/
8247f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8248f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichstatic void
824953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisadd_name_to_table(compile_block *cb, PCRE2_SPTR name, int length,
8250f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  unsigned int groupno)
8251f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{
8252f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichint i;
825353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *slot = cb->name_table;
8254f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
825553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfor (i = 0; i < cb->names_found; i++)
8256f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
825753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  int crc = memcmp(name, slot+IMM2_SIZE, CU2BYTES(length));
8258f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  if (crc == 0 && slot[IMM2_SIZE+length] != 0)
8259f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    crc = -1; /* Current name is a substring */
8260f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8261f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  /* Make space in the table and break the loop for an earlier name. For a
8262f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  duplicate or later name, carry on. We do this for duplicates so that in the
8263f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  simple case (when ?(| is not used) they are in order of their numbers. In all
8264f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  cases they are in the order in which they appear in the pattern. */
8265f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8266f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  if (crc < 0)
8267f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
826853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    memmove(slot + cb->name_entry_size, slot,
826953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      CU2BYTES((cb->names_found - i) * cb->name_entry_size));
8270f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    break;
8271f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
8272f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8273f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  /* Continue the loop for a later or duplicate name */
8274f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
827553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  slot += cb->name_entry_size;
8276f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
8277f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8278f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichPUT2(slot, 0, groupno);
827953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismemcpy(slot + IMM2_SIZE, name, CU2BYTES(length));
828053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb->names_found++;
828153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
828253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Add a terminating zero and fill the rest of the slot with zeroes so that
828353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe memory is all initialized. Otherwise valgrind moans about uninitialized
828453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismemory when saving serialized compiled patterns. */
828553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
828653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismemset(slot + IMM2_SIZE + length, 0,
828753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  CU2BYTES(cb->name_entry_size - length - IMM2_SIZE));
8288f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}
8289f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8290f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8291f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8292f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/*************************************************
829353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*     External function to compile a pattern     *
8294f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*************************************************/
8295f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
829653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* This function reads a regular expression in the form of a string and returns
829753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisa pointer to a block of store holding a compiled version of the expression.
8298f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8299f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichArguments:
8300f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  pattern       the regular expression
830153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  patlen        the length of the pattern, or PCRE2_ZERO_TERMINATED
830253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  options       option bits
830353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  errorptr      pointer to errorcode
830453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  erroroffset   pointer to error offset
830553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ccontext      points to a compile context or is NULL
8306f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8307f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichReturns:        pointer to compiled data block, or NULL on error,
830853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis                with errorcode and erroroffset set
8309f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*/
8310f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
831153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION
831253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE patlen, uint32_t options,
831353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis   int *errorptr, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext)
8314f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich{
831553e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisBOOL utf;                               /* Set TRUE for UTF mode */
831653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispcre2_real_code *re = NULL;             /* What we will return */
831753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscompile_block cb;                       /* "Static" compile-time data */
831853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisconst uint8_t *tables;                  /* Char tables base pointer */
8319f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
832053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *code;                      /* Current pointer in compiled code */
832153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR codestart;                   /* Start of compiled code */
832253e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_SPTR ptr;                         /* Current pointer in pattern */
8323f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
832453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissize_t length = 1;                      /* Allow or final END opcode */
832553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissize_t usedlength;                      /* Actual length used */
832653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissize_t re_blocksize;                    /* Size of memory block */
832753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
832853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint32_t firstcuflags, reqcuflags;       /* Type of first/req code unit */
832953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t firstcu, reqcu;                /* Value of first/req code unit */
833053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t setflags = 0;                  /* NL and BSR set flags */
833153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
833253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t skipatstart;                   /* When checking (*UTF) etc */
833353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t limit_match = UINT32_MAX;      /* Unset match limits */
833453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t limit_recursion = UINT32_MAX;
8335f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
833653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint newline = 0;                        /* Unset; can be set by the pattern */
833753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint bsr = 0;                            /* Unset; can be set by the pattern */
833853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisint errorcode = 0;                      /* Initialize to avoid compiler warn */
833953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
834053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Comments at the head of this file explain about these variables. */
834153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
834253e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *copied_pattern = NULL;
834353e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR stack_copied_pattern[COPIED_PATTERN_SIZE];
8344f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichnamed_group named_groups[NAMED_GROUP_LIST_SIZE];
8345f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
834653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* The workspace is used in different ways in the different compiling phases.
834753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisIt needs to be 16-bit aligned for the preliminary group scan, and 32-bit
834853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisaligned for the group information cache. */
834953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
835053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisuint32_t c32workspace[C32_WORK_SIZE];
835153e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_UCHAR *cworkspace = (PCRE2_UCHAR *)c32workspace;
835253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
8353f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
835453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* -------------- Check arguments and set up the pattern ----------------- */
8355f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
835653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* There must be error code and offset pointers. */
8357f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
835853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (errorptr == NULL || erroroffset == NULL) return NULL;
835953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*errorptr = ERR0;
836053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*erroroffset = 0;
836153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
836253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* There must be a pattern! */
836353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
836453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (pattern == NULL)
8365f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
836653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  *errorptr = ERR16;
8367f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  return NULL;
8368f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
8369f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
837053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Check that all undefined public option bits are zero. */
8371f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
837253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
8373f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
837453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  *errorptr = ERR17;
837553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  return NULL;
8376f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
8377f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
837853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* A NULL compile context means "use a default context" */
8379f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
838053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (ccontext == NULL)
838153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ccontext = (pcre2_compile_context *)(&PRIV(default_compile_context));
8382f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
838353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* A zero-terminated pattern is indicated by the special length value
838453e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisPCRE2_ZERO_TERMINATED. Otherwise, we make a copy of the pattern and add a zero,
838553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisto ensure that it is always possible to look one code unit beyond the end of
838653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe pattern's characters. In both cases, check that the pattern is overlong. */
8387f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
838853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (patlen == PCRE2_ZERO_TERMINATED)
838953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
839053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  patlen = PRIV(strlen)(pattern);
839153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (patlen > ccontext->max_pattern_length)
839253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
839353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *errorptr = ERR88;
839453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    return NULL;
839553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
839653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
839753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiselse
8398f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
839953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (patlen > ccontext->max_pattern_length)
840053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
840153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    *errorptr = ERR88;
840253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    return NULL;
840353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
840453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (patlen < COPIED_PATTERN_SIZE)
840553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    copied_pattern = stack_copied_pattern;
840653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  else
840753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
840853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    copied_pattern = ccontext->memctl.malloc(CU2BYTES(patlen + 1),
840953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      ccontext->memctl.memory_data);
841053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (copied_pattern == NULL)
841153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
841253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      *errorptr = ERR21;
841353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      return NULL;
841453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
841553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
841653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  memcpy(copied_pattern, pattern, CU2BYTES(patlen));
841753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  copied_pattern[patlen] = 0;
841853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  pattern = copied_pattern;
8419f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
8420f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
842153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* ------------ Initialize the "static" compile data -------------- */
842253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
842353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskistables = (ccontext->tables != NULL)? ccontext->tables : PRIV(default_tables);
842453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
842553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.lcc = tables + lcc_offset;          /* Individual */
842653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.fcc = tables + fcc_offset;          /*   character */
842753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.cbits = tables + cbits_offset;      /*      tables */
842853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.ctypes = tables + ctypes_offset;
842953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
843053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.assert_depth = 0;
843153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.bracount = cb.final_bracount = 0;
843253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.cx = ccontext;
843353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.dupnames = FALSE;
843453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.end_pattern = pattern + patlen;
843553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.nestptr[0] = cb.nestptr[1] = NULL;
843653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.external_flags = 0;
843753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.external_options = options;
843853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.groupinfo = c32workspace;
843953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.had_recurse = FALSE;
844053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.iscondassert = FALSE;
844153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.max_lookbehind = 0;
844253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.name_entry_size = 0;
844353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.name_table = NULL;
844453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.named_groups = named_groups;
844553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.named_group_list_size = NAMED_GROUP_LIST_SIZE;
844653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.names_found = 0;
844753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.open_caps = NULL;
844853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.parens_depth = 0;
844953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.req_varyopt = 0;
845053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.start_code = cworkspace;
845153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.start_pattern = pattern;
845253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.start_workspace = cworkspace;
845353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.workspace_size = COMPILE_WORK_SIZE;
845453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
845553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Maximum back reference and backref bitmap. The bitmap records up to 31 back
845653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreferences to help in deciding whether (.*) can be treated as anchored or not.
845753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis*/
845853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
845953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.top_backref = 0;
846053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.backref_map = 0;
8461f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
846253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* --------------- Start looking at the pattern --------------- */
8463f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
846453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Check for global one-time option settings at the start of the pattern, and
846553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisremember the offset to the actual regex. */
8466f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
846753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisptr = pattern;
846853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisskipatstart = 0;
8469f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8470f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichwhile (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
8471f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich       ptr[skipatstart+1] == CHAR_ASTERISK)
8472f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
847353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  unsigned int i;
847453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  for (i = 0; i < sizeof(pso_list)/sizeof(pso); i++)
847553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
847653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    pso *p = pso_list + i;
8477f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
847853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (PRIV(strncmp_c8)(ptr+skipatstart+2, (char *)(p->name), p->length) == 0)
847953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
848053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      uint32_t c, pp;
8481f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
848253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      skipatstart += p->length + 2;
848353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      switch(p->type)
848453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
848553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        case PSO_OPT:
848653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        cb.external_options |= p->value;
848753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        break;
8488f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
848953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        case PSO_FLG:
849053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        setflags |= p->value;
849153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        break;
8492f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
849353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        case PSO_NL:
849453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        newline = p->value;
849553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        setflags |= PCRE2_NL_SET;
849653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        break;
8497f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
849853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        case PSO_BSR:
849953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        bsr = p->value;
850053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        setflags |= PCRE2_BSR_SET;
850153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        break;
850253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
850353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        case PSO_LIMM:
850453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        case PSO_LIMR:
850553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        c = 0;
850653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        pp = skipatstart;
850753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (!IS_DIGIT(ptr[pp]))
850853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
850953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          errorcode = ERR60;
851053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          ptr += pp;
851153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          goto HAD_ERROR;
851253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
851353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        while (IS_DIGIT(ptr[pp]))
851453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
851553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          if (c > UINT32_MAX / 10 - 1) break;   /* Integer overflow */
851653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          c = c*10 + (ptr[pp++] - CHAR_0);
851753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
851853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS)
851953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
852053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          errorcode = ERR60;
852153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          ptr += pp;
852253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          goto HAD_ERROR;
852353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
852453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (p->type == PSO_LIMM) limit_match = c;
852553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          else limit_recursion = c;
852653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        skipatstart += pp - skipatstart;
852753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        break;
852853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
852953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      break;   /* Out of the table scan loop */
8530f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
8531f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
853253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (i >= sizeof(pso_list)/sizeof(pso)) break;   /* Out of pso loop */
8533f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
8534f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
853553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* End of pattern-start options; advance to start of real regex. */
8536f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
853753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisptr += skipatstart;
8538f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
853953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Can't support UTF or UCP unless PCRE2 has been compiled with UTF support. */
854053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
854153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifndef SUPPORT_UNICODE
854253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((cb.external_options & (PCRE2_UTF|PCRE2_UCP)) != 0)
8543f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
8544f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  errorcode = ERR32;
854553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  goto HAD_ERROR;
8546f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
8547f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
8548f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
854953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Check UTF. We have the original options in 'options', with that value as
855053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismodified by (*UTF) etc in cb->external_options. */
8551f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
855253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisutf = (cb.external_options & PCRE2_UTF) != 0;
855353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (utf)
8554f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
855553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if ((options & PCRE2_NEVER_UTF) != 0)
855653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
855753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    errorcode = ERR74;
855853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    goto HAD_ERROR;
855953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
856053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if ((options & PCRE2_NO_UTF_CHECK) == 0 &&
856153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       (errorcode = PRIV(valid_utf)(pattern, patlen, erroroffset)) != 0)
856253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    goto HAD_UTF_ERROR;
8563f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
8564f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
856553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Check UCP lockout. */
8566f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
856753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((cb.external_options & (PCRE2_UCP|PCRE2_NEVER_UCP)) ==
856853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    (PCRE2_UCP|PCRE2_NEVER_UCP))
8569f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
857053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  errorcode = ERR75;
857153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  goto HAD_ERROR;
8572f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
8573f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
857453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Process the BSR setting. */
8575f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
857653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (bsr == 0) bsr = ccontext->bsr_convention;
8577f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
857853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Process the newline setting. */
857953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
858053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (newline == 0) newline = ccontext->newline_convention;
858153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.nltype = NLTYPE_FIXED;
858253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisswitch(newline)
8583f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
858453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  case PCRE2_NEWLINE_CR:
858553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb.nllen = 1;
858653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb.nl[0] = CHAR_CR;
858753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  break;
858853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
858953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  case PCRE2_NEWLINE_LF:
859053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb.nllen = 1;
859153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb.nl[0] = CHAR_NL;
859253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  break;
859353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
859453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  case PCRE2_NEWLINE_CRLF:
859553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb.nllen = 2;
859653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb.nl[0] = CHAR_CR;
859753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb.nl[1] = CHAR_NL;
859853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  break;
859953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
860053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  case PCRE2_NEWLINE_ANY:
860153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb.nltype = NLTYPE_ANY;
860253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  break;
860353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
860453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  case PCRE2_NEWLINE_ANYCRLF:
860553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb.nltype = NLTYPE_ANYCRLF;
860653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  break;
860753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
860853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  default:
860953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  errorcode = ERR56;
861053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  goto HAD_ERROR;
8611f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
861253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
861353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Before we do anything else, do a pre-scan of the pattern in order to
861453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisdiscover the named groups and their numerical equivalents, so that this
861553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisinformation is always available for the remaining processing. */
861653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
861753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiserrorcode = scan_for_captures(&ptr, cb.external_options, &cb);
861853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (errorcode != 0) goto HAD_ERROR;
861953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
862053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* For obscure debugging this code can be enabled. */
862153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
862253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if 0
8623f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
862453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  int i;
862553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  named_group *ng = cb.named_groups;
862653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  fprintf(stderr, "+++Captures: %d\n", cb.final_bracount);
862753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  for (i = 0; i < cb.names_found; i++, ng++)
8628f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
862953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    fprintf(stderr, "+++%3d %.*s\n", ng->number, ng->length, ng->name);
8630f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
8631f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
863253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
8633f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
863453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Reset current bracket count to zero and current pointer to the start of the
863553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispattern. */
8636f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
863753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.bracount = 0;
863853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisptr = pattern + skipatstart;
8639f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
864053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Pretend to compile the pattern while actually just accumulating the amount
864153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisof memory required in the 'length' variable. This behaviour is triggered by
864253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispassing a non-NULL final argument to compile_regex(). We pass a block of
864353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisworkspace (cworkspace) for it to compile parts of the pattern into; the
864453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscompiled code is discarded when it is no longer needed, so hopefully this
864553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisworkspace will never overflow, though there is a test for its doing so.
8646f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
864753e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisOn error, errorcode will be set non-zero, so we don't need to look at the
864853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisresult of the function. The initial options have been put into the cb block so
864953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthat they can be changed if an option setting is found within the regex right
865053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisat the beginning. Bringing initial option settings outside can help speed up
865153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisstarting point checks. We still have to pass a separate options variable (the
865253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfirst argument) because that may change as the pattern is processed. */
8653f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8654f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichcode = cworkspace;
8655f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*code = OP_BRA;
8656f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
865753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis(void)compile_regex(cb.external_options, &code, &ptr, &errorcode, FALSE,
865853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  FALSE, 0, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL,
865953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  &cb, &length);
8660f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
866153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (errorcode != 0) goto HAD_ERROR;
8662f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichif (length > MAX_PATTERN_SIZE)
8663f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
8664f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  errorcode = ERR20;
866553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  goto HAD_ERROR;
8666f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
8667f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
866853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Compute the size of, and then get and initialize, the data block for storing
866953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe compiled pattern and names table. Integer overflow should no longer be
867053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispossible because nowadays we limit the maximum value of cb.names_found and
867153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.name_entry_size. */
8672f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
867353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre_blocksize = sizeof(pcre2_real_code) +
867453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  CU2BYTES(length + cb.names_found * cb.name_entry_size);
867553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre = (pcre2_real_code *)
867653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ccontext->memctl.malloc(re_blocksize, ccontext->memctl.memory_data);
8677f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichif (re == NULL)
8678f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
8679f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  errorcode = ERR21;
868053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  goto HAD_ERROR;
8681f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
8682f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
868353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->memctl = ccontext->memctl;
868453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->tables = tables;
868553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->executable_jit = NULL;
868653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismemset(re->start_bitmap, 0, 32 * sizeof(uint8_t));
868753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->blocksize = re_blocksize;
8688f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichre->magic_number = MAGIC_NUMBER;
868953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->compile_options = options;
869053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->overall_options = cb.external_options;
869153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags | setflags;
8692f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichre->limit_match = limit_match;
8693f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichre->limit_recursion = limit_recursion;
869453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->first_codeunit = 0;
869553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->last_codeunit = 0;
869653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->bsr_convention = bsr;
869753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->newline_convention = newline;
869853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->max_lookbehind = 0;
869953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->minlength = 0;
870053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->top_bracket = 0;
870153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->top_backref = 0;
870253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->name_entry_size = cb.name_entry_size;
870353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->name_count = cb.names_found;
870453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
870553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* The basic block is immediately followed by the name table, and the compiled
870653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscode follows after that. */
870753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
870853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscodestart = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code)) +
870953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  re->name_entry_size * re->name_count;
871053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
871153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Workspace is needed to remember information about numbered groups: whether a
871253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisgroup can match an empty string and what its fixed length is. This is done to
871353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisavoid the possibility of recursive references causing very long compile times
871453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswhen checking these features. Unnumbered groups do not have this exposure since
871553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthey cannot be referenced. We use an indexed vector for this purpose. If there
871653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisare sufficiently few groups, it can be the c32workspace vector, as set up
871753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisabove. Otherwise we have to get/free a special vector. The vector must be
871853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisinitialized to zero. */
871953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
872053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (cb.final_bracount >= C32_WORK_SIZE)
872153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
872253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb.groupinfo = ccontext->memctl.malloc(
872353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    (cb.final_bracount + 1)*sizeof(uint32_t), ccontext->memctl.memory_data);
872453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (cb.groupinfo == NULL)
872553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
872653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    errorcode = ERR21;
872753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    goto HAD_ERROR;
872853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
872953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
873053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismemset(cb.groupinfo, 0, (cb.final_bracount + 1) * sizeof(uint32_t));
873153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
873253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Update the compile data block for the actual compile. The starting points of
873353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe name/number translation table and of the code are passed around in the
873453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscompile data block. The start/end pattern and initial options are already set
873553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfrom the pre-compile phase, as is the name_entry_size field. Reset the bracket
873653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscount and the names_found field. */
873753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
873853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.parens_depth = 0;
873953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.assert_depth = 0;
874053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.bracount = 0;
874153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.max_lookbehind = 0;
874253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.name_table = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code));
874353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.start_code = codestart;
874453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.iscondassert = FALSE;
874553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.req_varyopt = 0;
874653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.had_accept = FALSE;
874753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.had_pruneorskip = FALSE;
874853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.check_lookbehind = FALSE;
874953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscb.open_caps = NULL;
8750f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8751f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* If any named groups were found, create the name/number table from the list
875253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscreated in the pre-pass. */
8753f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
875453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (cb.names_found > 0)
8755f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
875653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  int i = cb.names_found;
875753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  named_group *ng = cb.named_groups;
875853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  cb.names_found = 0;
8759f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  for (; i > 0; i--, ng++)
876053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    add_name_to_table(&cb, ng->name, ng->length, ng->number);
8761f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
8762f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8763f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Set up a starting, non-extracting bracket, then compile the expression. On
8764f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevicherror, errorcode will be set non-zero, so we don't need to look at the result
8765f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichof the function here. */
8766f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
876753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisptr = pattern + skipatstart;
876853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscode = (PCRE2_UCHAR *)codestart;
8769f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*code = OP_BRA;
877053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis(void)compile_regex(re->overall_options, &code, &ptr, &errorcode, FALSE, FALSE,
877153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis   0, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, &cb, NULL);
8772f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
877353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->top_bracket = cb.bracount;
877453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->top_backref = cb.top_backref;
877553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisre->max_lookbehind = cb.max_lookbehind;
8776f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
877753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (cb.had_accept)
877853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
877953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  reqcu = 0;              /* Must disable after (*ACCEPT) */
878053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  reqcuflags = REQ_NONE;
878153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
8782f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
87838b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskis/* Fill in the final opcode and check for disastrous overflow. If no overflow,
87848b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisbut the estimated length exceeds the really used length, adjust the value of
87858b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskisre->blocksize, and if valgrind support is configured, mark the extra allocated
87868b979b2abae173bb836d8e85a842cfd00447d4beJanis Danisevskismemory as unaddressable, so that any out-of-bound reads can be detected. */
8787f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8788f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich*code++ = OP_END;
878953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisusedlength = code - codestart;
879053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (usedlength > length) errorcode = ERR23; else
879153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
879253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  re->blocksize -= CU2BYTES(length - usedlength);
8793f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#ifdef SUPPORT_VALGRIND
879453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  VALGRIND_MAKE_MEM_NOACCESS(code, CU2BYTES(length - usedlength));
8795f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
879653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
879753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
879853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Scan the pattern for recursion/subroutine calls and convert the group
879953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnumbers into offsets. Maintain a small cache so that repeated groups containing
880053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisrecursions are efficiently handled. */
8801f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
880253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#define RSCAN_CACHE_SIZE 8
8803f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
880453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (errorcode == 0 && cb.had_recurse)
8805f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
880653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  PCRE2_UCHAR *rcode;
880753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  PCRE2_SPTR rgroup;
880853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  int ccount = 0;
880953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  int start = RSCAN_CACHE_SIZE;
881053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  recurse_cache rc[RSCAN_CACHE_SIZE];
881153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
881253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  for (rcode = (PCRE2_UCHAR *)find_recurse(codestart, utf);
881353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       rcode != NULL;
881453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       rcode = (PCRE2_UCHAR *)find_recurse(rcode + 1 + LINK_SIZE, utf))
8815f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
881653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    int i, p, recno;
88170ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes
881853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    recno = (int)GET(rcode, 1);
881953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (recno == 0) rgroup = codestart; else
88200ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes      {
882153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      PCRE2_SPTR search_from = codestart;
882253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      rgroup = NULL;
882353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      for (i = 0, p = start; i < ccount; i++, p = (p + 1) & 7)
882453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
882553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (recno == rc[p].recno)
882653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
882753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          rgroup = rc[p].group;
882853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          break;
882953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
88300ea9883633b5d1fcfc777d57427bbf9b0098397eElliott Hughes
883153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        /* Group n+1 must always start to the right of group n, so we can save
883253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        search time below when the new group number is greater than any of the
883353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        previously found groups. */
883453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
883553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (recno > rc[p].recno) search_from = rc[p].group;
883653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
883753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
883853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (rgroup == NULL)
883953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        {
884053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        rgroup = PRIV(find_bracket)(search_from, utf, recno);
884153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (rgroup == NULL)
884253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          {
884353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          errorcode = ERR53;
884453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          break;
884553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis          }
884653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (--start < 0) start = RSCAN_CACHE_SIZE - 1;
884753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        rc[start].recno = recno;
884853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        rc[start].group = rgroup;
884953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (ccount < RSCAN_CACHE_SIZE) ccount++;
885053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        }
8851f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
885253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
885353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    PUT(rcode, 1, rgroup - codestart);
8854f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
8855f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
8856f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
885753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* In rare debugging situations we sometimes need to look at the compiled code
885853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisat this stage. */
8859f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
886053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#ifdef CALL_PRINTINT
886153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispcre2_printint(re, stderr, TRUE);
886253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfprintf(stderr, "Length=%lu Used=%lu\n", length, usedlength);
886353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#endif
8864f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
886553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* After a successful compile, give an error if there's back reference to a
886653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisnon-existent capturing subpattern. Then, unless disabled, check whether any
886753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissingle character iterators can be auto-possessified. The function overwrites
886853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe appropriate opcode values, so the type of the pointer must be cast. NOTE:
886953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe intermediate variable "temp" is used in this code because at least one
887053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscompiler gives a warning about loss of "const" attribute if the cast
887153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis(PCRE2_UCHAR *)codestart is used directly in the function call. */
8872f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
887353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (errorcode == 0)
8874f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
887553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (re->top_backref > re->top_bracket) errorcode = ERR15;
887653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  else if ((re->overall_options & PCRE2_NO_AUTO_POSSESS) == 0)
887753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
887853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    PCRE2_UCHAR *temp = (PCRE2_UCHAR *)codestart;
887953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (PRIV(auto_possessify)(temp, utf, &cb) != 0) errorcode = ERR80;
888053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
8881f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
8882f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8883f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* If there were any lookbehind assertions that contained OP_RECURSE
8884f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich(recursions or subroutine calls), a flag is set for them to be checked here,
8885f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichbecause they may contain forward references. Actual recursions cannot be fixed
8886f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichlength, but subroutine calls can. It is done like this so that those without
8887f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick KralevichOP_RECURSE that are not fixed length get a diagnosic with a useful offset. The
8888f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichexceptional ones forgo this. We scan the pattern to check that they are fixed
8889f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichlength, and set their lengths. */
8890f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
889153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (errorcode == 0 && cb.check_lookbehind)
8892f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
889353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  PCRE2_UCHAR *cc = (PCRE2_UCHAR *)codestart;
8894f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8895f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  /* Loop, searching for OP_REVERSE items, and process those that do not have
8896f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  their length set. (Actually, it will also re-process any that have a length
8897f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  of zero, but that is a pathological case, and it does no harm.) When we find
889853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  one, we temporarily terminate the branch it is in while we scan it. Note that
889953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  calling find_bracket() with a negative group number returns a pointer to the
890053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  OP_REVERSE item, not the actual lookbehind. */
8901f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
890253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  for (cc = (PCRE2_UCHAR *)PRIV(find_bracket)(codestart, utf, -1);
8903f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich       cc != NULL;
890453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis       cc = (PCRE2_UCHAR *)PRIV(find_bracket)(cc, utf, -1))
8905f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
8906f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    if (GET(cc, 1) == 0)
8907f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
8908f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      int fixed_length;
890953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      int count = 0;
891053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      PCRE2_UCHAR *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
8911f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      int end_op = *be;
8912f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      *be = OP_END;
891353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      fixed_length = find_fixedlength(cc, utf, TRUE, &cb, NULL, &count);
8914f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      *be = end_op;
8915f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      if (fixed_length < 0)
8916f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
891753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        errorcode = fixed_length_errors[-fixed_length];
8918f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        break;
8919f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
892053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (fixed_length > cb.max_lookbehind) cb.max_lookbehind = fixed_length;
8921f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      PUT(cc, 1, fixed_length);
8922f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      }
8923f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    cc += 1 + LINK_SIZE;
8924f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
892553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
892653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* The previous value of the maximum lookbehind was transferred to the
892753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  compiled regex block above. We could have updated this value in the loop
892853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  above, but keep the two values in step, just in case some later code below
892953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  uses the cb value. */
893053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
893153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  re->max_lookbehind = cb.max_lookbehind;
8932f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
8933f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
893453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Failed to compile, or error while post-processing. Earlier errors get here
893553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisvia the dreaded goto. */
8936f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
8937f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichif (errorcode != 0)
8938f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
893953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  HAD_ERROR:
894053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  *erroroffset = (int)(ptr - pattern);
894153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  HAD_UTF_ERROR:
894253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  *errorptr = errorcode;
894353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  pcre2_code_free(re);
894453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  re = NULL;
894553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  goto EXIT;
8946f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
8947f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
894853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Successful compile. If the anchored option was not passed, set it if
894953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiswe can determine that the pattern is anchored by virtue of ^ characters or \A
895053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisor anything else, such as starting with non-atomic .* when DOTALL is set and
895153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthere are no occurrences of *PRUNE or *SKIP (though there is an option to
895253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisdisable this case). */
895353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
895453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((re->overall_options & PCRE2_ANCHORED) == 0 &&
895553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis     is_anchored(codestart, 0, &cb, 0))
895653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  re->overall_options |= PCRE2_ANCHORED;
8957f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
895853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* If the pattern is still not anchored and we do not have a first code unit,
895953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskissee if there is one that is asserted (these are not saved during the compile
896053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisbecause they can cause conflicts with actual literals that follow). This code
896153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisneed not be obeyed if PCRE2_NO_START_OPTIMIZE is set, as the data it would
896253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiscreate will not be used. */
8963f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
896453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0)
8965f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
896653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (firstcuflags < 0)
896753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    firstcu = find_firstassertedcu(codestart, &firstcuflags, FALSE);
8968f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
896953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* Save the data for a first code unit. */
8970f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
897153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (firstcuflags >= 0)
897253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
897353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    re->first_codeunit = firstcu;
897453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    re->flags |= PCRE2_FIRSTSET;
8975f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
897653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    /* Handle caseless first code units. */
8977f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
897853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if ((firstcuflags & REQ_CASELESS) != 0)
8979f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich      {
898053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (firstcu < 128 || (!utf && firstcu < 255))
8981f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        {
898253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        if (cb.fcc[firstcu] != firstcu) re->flags |= PCRE2_FIRSTCASELESS;
8983f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich        }
898453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
898553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      /* The first code unit is > 128 in UTF mode, or > 255 otherwise. In
898653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      8-bit UTF mode, codepoints in the range 128-255 are introductory code
898753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      points and cannot have another case. In 16-bit and 32-bit modes, we can
898853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      check wide characters when UTF (and therefore UCP) is supported. */
898953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
899053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
899153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      else if (firstcu <= MAX_UTF_CODE_POINT &&
899253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis               UCD_OTHERCASE(firstcu) != firstcu)
899353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis        re->flags |= PCRE2_FIRSTCASELESS;
8994f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
899553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
8996f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
8997f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
899853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* When there is no first code unit, see if we can set the PCRE2_STARTLINE
899953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  flag. This is helpful for multiline matches when all branches start with ^
900053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  and also when all branches start with non-atomic .* for non-DOTALL matches
900153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  when *PRUNE and SKIP are not present. (There is an option that disables this
900253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  case.) */
9003f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
900453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  else if (is_startline(codestart, 0, &cb, 0)) re->flags |= PCRE2_STARTLINE;
900553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
9006f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
900753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Handle the "required code unit", if one is set. In the case of an anchored
900853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskispattern, do this only if it follows a variable length item in the pattern.
900953e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisAgain, skip this if PCRE2_NO_START_OPTIMIZE is set. */
9010f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
901153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (reqcuflags >= 0 &&
901253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis     ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0 ||
901353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      (reqcuflags & REQ_VARY) != 0))
9014f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
901553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  re->last_codeunit = reqcu;
901653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  re->flags |= PCRE2_LASTSET;
9017f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
901853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  /* Handle caseless required code units as for first code units (above). */
9019f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
902053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if ((reqcuflags & REQ_CASELESS) != 0)
902153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
902253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    if (reqcu < 128 || (!utf && reqcu < 255))
902353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      {
902453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS;
902553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      }
902653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
902753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    else if (reqcu <= MAX_UTF_CODE_POINT && UCD_OTHERCASE(reqcu) != reqcu)
902853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis      re->flags |= PCRE2_LASTCASELESS;
9029f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich#endif
903053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
9031f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
9032f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
9033f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich/* Check for a pattern than can match an empty string, so that this information
9034f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichcan be provided to applications. */
9035f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
9036f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichdo
9037f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  {
903853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  int count = 0;
903953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  int rc = could_be_empty_branch(codestart, code, utf, &cb, TRUE, NULL, &count);
904053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (rc < 0)
904153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    {
904253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    errorcode = ERR86;
904353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    goto HAD_ERROR;
904453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    }
904553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  if (rc > 0)
9046f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    {
904753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    re->flags |= PCRE2_MATCH_EMPTY;
9048f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    break;
9049f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich    }
9050f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  codestart += GET(codestart, 1);
9051f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich  }
9052f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevichwhile (*codestart == OP_ALT);
9053f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
905453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Finally, unless PCRE2_NO_START_OPTIMIZE is set, study the compiled pattern
905553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisto set up information such as a bitmap of starting code units and a minimum
905653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskismatching length. */
905753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
905853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
905953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis    PRIV(study)(re) != 0)
906053e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  {
906153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  errorcode = ERR31;
906253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  goto HAD_ERROR;
906353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  }
906453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
906553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* Control ends up here in all cases. If memory was obtained for a
906653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskiszero-terminated copy of the pattern, remember to free it before returning. Also
906753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisfree the list of named groups if a larger one had to be obtained, and likewise
906853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisthe group information vector. */
906953e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
907053e448c1797913dbc12dbacc341559934566d6bcJanis DanisevskisEXIT:
907153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (copied_pattern != stack_copied_pattern)
907253e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ccontext->memctl.free(copied_pattern, ccontext->memctl.memory_data);
907353e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (cb.named_group_list_size > NAMED_GROUP_LIST_SIZE)
907453e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ccontext->memctl.free((void *)cb.named_groups, ccontext->memctl.memory_data);
907553e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisif (cb.groupinfo != c32workspace)
907653e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis  ccontext->memctl.free((void *)cb.groupinfo, ccontext->memctl.memory_data);
907753e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis
907853e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskisreturn re;    /* Will be NULL after an error */
9079f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich}
9080f73ff17bddb7dc18ff9044773dd65d040e8f4fcfNick Kralevich
908153e448c1797913dbc12dbacc341559934566d6bcJanis Danisevskis/* End of pcre2_compile.c */
9082